Module: CharacterSet::Parser

Defined in:
lib/character_set/parser.rb

Class Method Summary collapse

Class Method Details

.codepoints_from_bracket_expression(string) ⇒ Object

Raises:

  • (ArgumentError)


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/character_set/parser.rb', line 19

def codepoints_from_bracket_expression(string)
  raise ArgumentError, 'pass a String'   unless string.is_a?(String)
  raise ArgumentError, 'advanced syntax' if string =~ /\\[^uUx]|[^\\]\[|&&/

  content = strip_brackets(string)
  literal_content = eval_escapes(content)

  prev_chr = nil
  in_range = false

  literal_content.each_char.map do |chr|
    if chr == '-' && prev_chr && prev_chr != '\\' && prev_chr != '-'
      in_range = true
      nil
    else
      result = in_range ? ((prev_chr.ord + 1)..(chr.ord)).to_a : chr.ord
      in_range = false
      prev_chr = chr
      result
    end
  end.compact.flatten
end

.codepoints_from_enumerable(object) ⇒ Object

Raises:

  • (ArgumentError)


5
6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/character_set/parser.rb', line 5

def codepoints_from_enumerable(object)
  raise ArgumentError, 'pass an Enumerable' unless object.respond_to?(:each)

  # Use #each to check first element (only this works for all Enumerables)
  object.each do |el| # rubocop:disable Lint/UnreachableLoop
    if el.is_a?(Integer) && el >= 0 && el < 0x110000
      return object
    elsif el.is_a?(String) && el.length == 1
      return object.to_a.join.encode('utf-8').codepoints
    end
    raise ArgumentError, "#{el.inspect} is not valid as a codepoint"
  end
end

.eval_escapes(string) ⇒ Object



46
47
48
49
50
# File 'lib/character_set/parser.rb', line 46

def eval_escapes(string)
  string.gsub(/\\U(\h{8})|\\u(\h{4})|U\+(\h+)|\\x(\h{2})|\\u\{(\h+)\}/) do
    ($1 || $2 || $3 || $4 || $5).to_i(16).chr('utf-8')
  end
end

.strip_brackets(string) ⇒ Object



42
43
44
# File 'lib/character_set/parser.rb', line 42

def strip_brackets(string)
  string[/\A\[\^?(.*)\]\z/, 1] || string.dup
end