Class: List::Matcher
- Inherits:
-
Object
- Object
- List::Matcher
- Defined in:
- lib/list_matcher.rb
Defined Under Namespace
Classes: Alternate, CharClass, Leaf, Node, Sequence, Special, SpecialPattern
Constant Summary collapse
- QRX =
to make a replacement of Regexp.quote that ignores characters that only need quoting inside character classes
Regexp.new "([" + ( (1..255).map(&:chr).select{ |c| Regexp.quote(c) != c } - %w(-) ).map{ |c| Regexp.quote c }.join + "])"
Instance Attribute Summary collapse
-
#atomic ⇒ Object
readonly
Returns the value of attribute atomic.
-
#backtracking ⇒ Object
readonly
Returns the value of attribute backtracking.
-
#bound ⇒ Object
readonly
Returns the value of attribute bound.
-
#case_insensitive ⇒ Object
readonly
Returns the value of attribute case_insensitive.
-
#left_bound ⇒ Object
readonly
Returns the value of attribute left_bound.
-
#multiline ⇒ Object
readonly
Returns the value of attribute multiline.
-
#name ⇒ Object
readonly
Returns the value of attribute name.
-
#normalize_whitespace ⇒ Object
readonly
Returns the value of attribute normalize_whitespace.
-
#right_bound ⇒ Object
readonly
Returns the value of attribute right_bound.
-
#strip ⇒ Object
readonly
Returns the value of attribute strip.
-
#vet ⇒ Object
readonly
Returns the value of attribute vet.
-
#word_test ⇒ Object
readonly
Returns the value of attribute word_test.
Class Method Summary collapse
-
.pattern(list, opts = {}) ⇒ Object
convenience method for one-off regexen where there’s no point in keeping around a pattern generator.
- .quote(s) ⇒ Object
-
.rx(list, opts = {}) ⇒ Object
like self.pattern, but returns a regex rather than a string.
Instance Method Summary collapse
-
#bud(opts = {}) ⇒ Object
returns a new pattern matcher differing from the original only in the options specified.
-
#initialize(atomic: true, backtracking: true, bound: false, strip: false, case_insensitive: false, multiline: false, normalize_whitespace: false, symbols: {}, name: false, vet: false) ⇒ Matcher
constructor
A new instance of Matcher.
- #modifiers ⇒ Object
-
#pattern(list, opts = {}) ⇒ Object
converst list into a string representing a regex pattern suitable for inclusion in a larger regex.
- #pfx ⇒ Object
- #qmark ⇒ Object
- #quote(s) ⇒ Object
-
#rx(list, opts = {}) ⇒ Object
like pattern but it returns a regex instead of a string.
- #tree(list, symbols) ⇒ Object
- #wrap(s) ⇒ Object
- #wrap_size ⇒ Object
Constructor Details
#initialize(atomic: true, backtracking: true, bound: false, strip: false, case_insensitive: false, multiline: false, normalize_whitespace: false, symbols: {}, name: false, vet: false) ⇒ Matcher
Returns a new instance of Matcher.
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/list_matcher.rb', line 21 def initialize( atomic: true, backtracking: true, bound: false, strip: false, case_insensitive: false, multiline: false, normalize_whitespace: false, symbols: {}, name: false, vet: false ) @atomic = atomic @backtracking = backtracking @strip = strip || normalize_whitespace @case_insensitive = case_insensitive @multiline = multiline @symbols = deep_dup symbols @_bound = bound @bound = !!bound @normalize_whitespace = normalize_whitespace @vet = vet if name raise "" unless name.is_a?(String) || name.is_a?(Symbol) if Regexp.new "(?<#{name}>.*)" # stir up any errors that might arise from using this name in a named capture @name = name end end if bound == :string @word_test = /./ @left_bound = '\A' @right_bound = '\z' elsif bound == :line @word_test = /./ @left_bound = '^' @right_bound = '$' elsif bound.is_a? Hash @word_test = bound[:test] || raise(SyntaxError.new('no boundary test provided')) @left_bound = bound[:left] || raise(SyntaxError.new('no left boundary expression provided')) @right_bound = bound[:right] || raise(SyntaxError.new('no right boundary expression provided')) elsif bound === true || bound == :word @word_test = /\w/ @left_bound = '\b' @right_bound = '\b' elsif !( bound === false ) raise "unfamiliar value for :bound option: #{bound.inspect}" end if normalize_whitespace @symbols[' '] = { pattern: '\s++' } end symbols.keys.each do |k| raise "symbols variable #{k} is neither a string, a symbol, nor a regex" unless k.is_a?(String) || k.is_a?(Symbol) || k.is_a?(Regexp) end if vet Special.new( self, @symbols, [] ).verify end end |
Instance Attribute Details
#atomic ⇒ Object (readonly)
Returns the value of attribute atomic.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def atomic @atomic end |
#backtracking ⇒ Object (readonly)
Returns the value of attribute backtracking.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def backtracking @backtracking end |
#bound ⇒ Object (readonly)
Returns the value of attribute bound.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def bound @bound end |
#case_insensitive ⇒ Object (readonly)
Returns the value of attribute case_insensitive.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def case_insensitive @case_insensitive end |
#left_bound ⇒ Object (readonly)
Returns the value of attribute left_bound.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def left_bound @left_bound end |
#multiline ⇒ Object (readonly)
Returns the value of attribute multiline.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def multiline @multiline end |
#name ⇒ Object (readonly)
Returns the value of attribute name.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def name @name end |
#normalize_whitespace ⇒ Object (readonly)
Returns the value of attribute normalize_whitespace.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def normalize_whitespace @normalize_whitespace end |
#right_bound ⇒ Object (readonly)
Returns the value of attribute right_bound.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def right_bound @right_bound end |
#strip ⇒ Object (readonly)
Returns the value of attribute strip.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def strip @strip end |
#vet ⇒ Object (readonly)
Returns the value of attribute vet.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def vet @vet end |
#word_test ⇒ Object (readonly)
Returns the value of attribute word_test.
5 6 7 |
# File 'lib/list_matcher.rb', line 5 def word_test @word_test end |
Class Method Details
.pattern(list, opts = {}) ⇒ Object
convenience method for one-off regexen where there’s no point in keeping around a pattern generator
9 10 11 |
# File 'lib/list_matcher.rb', line 9 def self.pattern(list, opts={}) self.new(**opts).pattern list end |
.quote(s) ⇒ Object
203 204 205 |
# File 'lib/list_matcher.rb', line 203 def self.quote(s) s.gsub(QRX) { |c| Regexp.quote c } end |
.rx(list, opts = {}) ⇒ Object
like self.pattern, but returns a regex rather than a string
14 15 16 |
# File 'lib/list_matcher.rb', line 14 def self.rx(list, opts={}) self.new(**opts).rx list end |
Instance Method Details
#bud(opts = {}) ⇒ Object
returns a new pattern matcher differing from the original only in the options specified
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/list_matcher.rb', line 80 def bud(opts={}) opts = { atomic: @atomic, backtracking: @backtracking, bound: @_bound, strip: @strip, case_insensitive: @case_insensitive, multiline: @multiline, normalize_whitespace: @normalize_whitespace, symbols: @symbols, name: @name, vet: @vet && opts[:symbols] }.merge opts self.class.new(**opts) end |
#modifiers ⇒ Object
126 127 128 129 130 131 132 |
# File 'lib/list_matcher.rb', line 126 def modifiers ( @modifiers ||= if case_insensitive || multiline [ ( 'i' if case_insensitive ), ( 'm' if multiline ) ].compact.join else [nil] end )[0] end |
#pattern(list, opts = {}) ⇒ Object
converst list into a string representing a regex pattern suitable for inclusion in a larger regex
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/list_matcher.rb', line 97 def pattern( list, opts={} ) return bud(opts).pattern list unless opts.empty? list = list.compact.map(&:to_s).select{ |s| s.length > 0 } list.map!(&:strip).select!{ |s| s.length > 0 } if strip list.map!{ |s| s.gsub /\s++/, ' ' } if normalize_whitespace return nil if list.empty? specializer = Special.new self, @symbols, list list = specializer.normalize root = tree list, specializer root.root = true root.flatten rx = root.convert if m = modifiers rx = "(?#{m}:#{rx})" grouped = true end if name rx = "(?<#{name}>#{rx})" grouped = true end return rx if grouped && backtracking if atomic && !root.atomic? wrap rx else rx end end |
#pfx ⇒ Object
139 140 141 |
# File 'lib/list_matcher.rb', line 139 def pfx @pfx ||= backtracking ? '(?:' : '(?>' end |
#qmark ⇒ Object
143 144 145 |
# File 'lib/list_matcher.rb', line 143 def qmark @qmark ||= backtracking ? '?' : '?+' end |
#quote(s) ⇒ Object
207 208 209 |
# File 'lib/list_matcher.rb', line 207 def quote(s) self.class.quote s end |
#rx(list, opts = {}) ⇒ Object
like pattern but it returns a regex instead of a string
135 136 137 |
# File 'lib/list_matcher.rb', line 135 def rx(list, opts={}) Regexp.new pattern(list, opts) end |
#tree(list, symbols) ⇒ Object
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
# File 'lib/list_matcher.rb', line 155 def tree(list, symbols) if list.size == 1 leaves = list[0].chars.map do |c| symbols.symbols(c) || Leaf.new( self, c ) end if leaves.length == 1 leaves.first else Sequence.new self, *leaves end elsif list.all?{ |w| w.length == 1 } chars = list.select{ |w| !symbols.symbols(w) } if chars.size > 1 list -= chars c = CharClass.new self, chars end a = Alternate.new self, symbols, list unless list.empty? a.children.unshift c if a && c a || c elsif c = best_prefix(list) # found a fixed-width prefix pattern if optional = c[1].include?('') c[1].reject!{ |w| w == '' } end c1 = tree c[0], symbols c2 = tree c[1], symbols c2.optional = optional Sequence.new self, c1, c2 elsif c = best_suffix(list) # found a fixed-width suffix pattern if optional = c[0].include?('') c[0].reject!{ |w| w == '' } end c1 = tree c[0], symbols c1.optional = optional c2 = tree c[1], symbols Sequence.new self, c1, c2 else grouped = list.group_by{ |w| w[0] } chars = grouped.select{ |_, w| w.size == 1 && w[0].size == 1 && !symbols.symbols(w[0]) }.map{ |v, _| v } if chars.size > 1 list -= chars c = CharClass.new self, chars end a = Alternate.new self, symbols, list a.children.unshift c if c a end end |
#wrap(s) ⇒ Object
147 148 149 |
# File 'lib/list_matcher.rb', line 147 def wrap(s) pfx + s + ')' end |
#wrap_size ⇒ Object
151 152 153 |
# File 'lib/list_matcher.rb', line 151 def wrap_size @wrap_size ||= pfx.length + 1 end |