Module: CharacterSet::Parser
- Defined in:
- lib/character_set/parser.rb
Class Method Summary collapse
- .codepoints_from_bracket_expression(string) ⇒ Object
- .codepoints_from_enumerable(object) ⇒ Object
- .eval_escapes(string) ⇒ Object
- .strip_brackets(string) ⇒ Object
Class Method Details
.codepoints_from_bracket_expression(string) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/character_set/parser.rb', line 19 def codepoints_from_bracket_expression(string) raise ArgumentError, 'pass a String' unless string.is_a?(String) raise ArgumentError, 'advanced syntax' if string =~ /\\[^uUx]|[^\\]\[|&&/ content = strip_brackets(string) literal_content = eval_escapes(content) prev_chr = nil in_range = false literal_content.each_char.map do |chr| if chr == '-' && prev_chr && prev_chr != '\\' && prev_chr != '-' in_range = true nil else result = in_range ? ((prev_chr.ord + 1)..(chr.ord)).to_a : chr.ord in_range = false prev_chr = chr result end end.compact.flatten end |
.codepoints_from_enumerable(object) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 |
# File 'lib/character_set/parser.rb', line 5 def codepoints_from_enumerable(object) raise ArgumentError, 'pass an Enumerable' unless object.respond_to?(:each) # Use #each to check first element (only this works for all Enumerables) object.each do |el| # rubocop:disable Lint/UnreachableLoop if el.is_a?(Integer) && el >= 0 && el < 0x110000 return object elsif el.is_a?(String) && el.length == 1 return object.to_a.join.encode('utf-8').codepoints end raise ArgumentError, "#{el.inspect} is not valid as a codepoint" end end |
.eval_escapes(string) ⇒ Object
46 47 48 49 50 |
# File 'lib/character_set/parser.rb', line 46 def eval_escapes(string) string.gsub(/\\U(\h{8})|\\u(\h{4})|U\+(\h+)|\\x(\h{2})|\\u\{(\h+)\}/) do ($1 || $2 || $3 || $4 || $5).to_i(16).chr('utf-8') end end |
.strip_brackets(string) ⇒ Object
42 43 44 |
# File 'lib/character_set/parser.rb', line 42 def strip_brackets(string) string[/\A\[\^?(.*)\]\z/, 1] || string.dup end |