Module: CharacterSet::Parser

Defined in:
lib/character_set/parser.rb

Class Method Summary collapse

Class Method Details

.codepoints_from_bracket_expression(string) ⇒ Object

Raises:

  • (ArgumentError)


15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/character_set/parser.rb', line 15

def codepoints_from_bracket_expression(string)
  raise ArgumentError, 'pass a String'   unless string.is_a?(String)
  raise ArgumentError, 'advanced syntax' if string =~ /\\[^uUx]|[^\\]\[|&&/

  content = strip_brackets(string)
  literal_content = eval_escapes(content)

  prev_chr = nil
  in_range = false

  literal_content.each_char.map do |chr|
    if chr == '-' && prev_chr && prev_chr != '\\' && prev_chr != '-'
      in_range = true
      nil
    else
      result = in_range ? ((prev_chr.ord + 1)..(chr.ord)).to_a : chr.ord
      in_range = false
      prev_chr = chr
      result
    end
  end.compact.flatten
end

.codepoints_from_enumerable(object) ⇒ Object

Raises:

  • (ArgumentError)


5
6
7
8
9
10
11
12
13
# File 'lib/character_set/parser.rb', line 5

def codepoints_from_enumerable(object)
  raise ArgumentError, 'pass an Enumerable' unless object.respond_to?(:each)
  # Use #each to check first element (only this works for all Enumerables)
  object.each do |e| # rubocop:disable Lint/UnreachableLoop
    return object            if e.is_a?(Integer) && e >= 0 && e < 0x110000
    return object.map(&:ord) if e.is_a?(String)  && e.length == 1
    raise ArgumentError, "#{e.inspect} is not valid as a codepoint"
  end
end

.eval_escapes(string) ⇒ Object



42
43
44
45
46
# File 'lib/character_set/parser.rb', line 42

def eval_escapes(string)
  string.gsub(/\\U(\h{8})|\\u(\h{4})|U\+(\h+)|\\x(\h{2})|\\u\{(\h+)\}/) do
    ($1 || $2 || $3 || $4 || $5).to_i(16).chr('utf-8')
  end
end

.strip_brackets(string) ⇒ Object



38
39
40
# File 'lib/character_set/parser.rb', line 38

def strip_brackets(string)
  string[/\A\[\^?(.*)\]\z/, 1] || string.dup
end