Class: Dentaku::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/dentaku/tokenizer.rb

Constant Summary collapse

LPAREN =
TokenMatcher.new(:grouping, :open)
RPAREN =
TokenMatcher.new(:grouping, :close)

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#aliasesObject (readonly)

Returns the value of attribute aliases.



7
8
9
# File 'lib/dentaku/tokenizer.rb', line 7

def aliases
  @aliases
end

#case_sensitiveObject (readonly)

Returns the value of attribute case_sensitive.



7
8
9
# File 'lib/dentaku/tokenizer.rb', line 7

def case_sensitive
  @case_sensitive
end

Instance Method Details

#alias_regexObject



80
81
82
83
# File 'lib/dentaku/tokenizer.rb', line 80

def alias_regex
  values = @aliases.values.flatten.join('|')
  /(?<=\p{Punct}|[[:space:]]|\A)(#{values})(?=\()/i
end

#last_tokenObject



36
37
38
# File 'lib/dentaku/tokenizer.rb', line 36

def last_token
  @tokens.last
end

#replace_aliases(string) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/dentaku/tokenizer.rb', line 66

def replace_aliases(string)
  return string unless @aliases.any?

  string.gsub!(alias_regex) do |match|
    match_regex = /^#{Regexp.escape(match)}$/i

    @aliases.detect do |(_key, aliases)|
      !aliases.grep(match_regex).empty?
    end.first
  end

  string
end

#scan(string, scanner) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/dentaku/tokenizer.rb', line 40

def scan(string, scanner)
  if tokens = scanner.scan(string, last_token)
    tokens.each do |token|
      if token.empty?
        fail! :unexpected_zero_width_match,
              token_category: token.category, at: string
      end

      @nesting += 1 if LPAREN == token
      @nesting -= 1 if RPAREN == token
      fail! :too_many_closing_parentheses if @nesting < 0

      @tokens << token unless token.is?(:whitespace)
    end

    match_length = tokens.map(&:length).reduce(:+)
    [true, string[match_length..-1]]
  else
    [false, string]
  end
end

#strip_comments(input) ⇒ Object



62
63
64
# File 'lib/dentaku/tokenizer.rb', line 62

def strip_comments(input)
  input.gsub(/\/\*[^*]*\*+(?:[^*\/][^*]*\*+)*\//, '')
end

#tokenize(string, options = {}) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/dentaku/tokenizer.rb', line 12

def tokenize(string, options = {})
  @nesting = 0
  @tokens  = []
  @aliases = options.fetch(:aliases, global_aliases)
  input    = strip_comments(string.to_s.dup)
  input    = replace_aliases(input)
  @case_sensitive = options.fetch(:case_sensitive, false)

  until input.empty?
    scanned = TokenScanner.scanners(case_sensitive: case_sensitive).any? do |scanner|
      scanned, input = scan(input, scanner)
      scanned
    end

    unless scanned
      fail! :parse_error, at: input
    end
  end

  fail! :too_many_opening_parentheses if @nesting > 0

  @tokens
end