Class: Dendroid::Utils::BaseTokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/dendroid/utils/base_tokenizer.rb

Overview

A basic tokenizer. Responsibility: break input into a sequence of token objects. This class defines a simple DSL to build a tokenizer.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(&aBlock) ⇒ BaseTokenizer

Constructor

Parameters:

  • aBlock (Proc)


28
29
30
31
32
33
34
35
# File 'lib/dendroid/utils/base_tokenizer.rb', line 28

def initialize(&aBlock)
  @scanner = StringScanner.new('')
  @actions = { skip: [], scan_verbatim: [], scan_value: [] }
  defaults
  return unless block_given?

  instance_exec(&aBlock)
end

Instance Attribute Details

#actionsHash{Symbol, Array<Regexp>} (readonly)

Returns:

  • (Hash{Symbol, Array<Regexp>})


24
25
26
# File 'lib/dendroid/utils/base_tokenizer.rb', line 24

def actions
  @actions
end

#line_startInteger (readonly)

Returns Position of last start of line in the input string.

Returns:

  • (Integer)

    Position of last start of line in the input string



21
22
23
# File 'lib/dendroid/utils/base_tokenizer.rb', line 21

def line_start
  @line_start
end

#linenoInteger (readonly)

Returns The current line number.

Returns:

  • (Integer)

    The current line number



18
19
20
# File 'lib/dendroid/utils/base_tokenizer.rb', line 18

def lineno
  @lineno
end

#scannerStringScanner (readonly)

Returns Low-level input scanner.

Returns:

  • (StringScanner)

    Low-level input scanner



15
16
17
# File 'lib/dendroid/utils/base_tokenizer.rb', line 15

def scanner
  @scanner
end

Instance Method Details

#input=(source) ⇒ Object

Reset the tokenizer and set new text to tokenize

Parameters:

  • source (String)


39
40
41
42
# File 'lib/dendroid/utils/base_tokenizer.rb', line 39

def input=(source)
  reset
  scanner.string = source
end

#map_verbatim2terminal(mapping) ⇒ Object

Set the mapping between a verbatim text to its corresponding terminal symbol name

Parameters:

  • mapping (Hash{String, String})


108
109
110
# File 'lib/dendroid/utils/base_tokenizer.rb', line 108

def map_verbatim2terminal(mapping)
  @verbatim2terminal = mapping
end

#next_tokenDendroid::Lexical::Token, NilClass

Return the next token (if any) from the input stream.

Returns:



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/dendroid/utils/base_tokenizer.rb', line 116

def next_token
  token = nil

  # Loop until end of input reached or token found
  until scanner.eos?
    if scanner.skip(actions[:skip_nl])
      next_line_scanned
      next
    end

    next if scanner.skip(actions[:skip_ws]) # Skip whitespaces

    if (text = scanner.scan(actions[:scan_verbatim]))
      token = verbatim_scanned(text)
      break
    end

    tuple = actions[:scan_value].find do |(pattern, _terminal, _conversion)|
      scanner.check(pattern)
    end
    if tuple
      (pattern, terminal, conversion) = tuple
      text = scanner.scan(pattern)
      token = value_scanned(text, terminal, conversion)
      break
    end

    # Unknown token
    col = scanner.pos - line_start + 1
    erroneous = scanner.peek(1).nil? ? '' : scanner.scan(/./)
    raise StandardError, "Error: [line #{lineno}:#{col}]: Unexpected character #{erroneous}."
  end

  token
end

#resetObject

Reset the tokenizer



45
46
47
48
49
# File 'lib/dendroid/utils/base_tokenizer.rb', line 45

def reset
  @lineno = 1
  @line_start = 0
  scanner.reset
end

#scan_value(pattern, terminal, conversion) ⇒ Object

Associate the provided pattern with the action to tokenize the matching text as an instance of the given terminal symbol and convert the matching text into a value by using the given conversion.

Parameters:

  • pattern (Regexp)
  • terminal (Dendroid::Syntax::Terminal)
  • conversion (Proc)

    a Proc (lambda) that takes a String as argument and return a value.



96
97
98
99
100
101
102
103
104
# File 'lib/dendroid/utils/base_tokenizer.rb', line 96

def scan_value(pattern, terminal, conversion)
  patt = normalize_pattern(pattern)
  tuple = [patt, terminal, conversion]
  if actions[:scan_value].empty?
    actions[:scan_value] = [tuple]
  else
    actions[:scan_verbatim] << tuple
  end
end

#scan_verbatim(pattern) ⇒ Object

Associate the provided pattern with the action to tokenize the matching text

Parameters:

  • pattern (Regexp)


80
81
82
83
84
85
86
87
88
# File 'lib/dendroid/utils/base_tokenizer.rb', line 80

def scan_verbatim(pattern)
  patt = normalize_pattern(pattern)
  if actions[:scan_verbatim].empty?
    actions[:scan_verbatim] = patt
  else
    new_pattern = actions[:scan_verbatim].union(patt)
    actions[:scan_verbatim] = new_pattern
  end
end

#skip(pattern) ⇒ Object

Associate the provided pattern with the action to skip the matching text.

Parameters:

  • pattern (Regexp)


69
70
71
72
73
74
75
76
# File 'lib/dendroid/utils/base_tokenizer.rb', line 69

def skip(pattern)
  if actions[:skip].empty?
    actions[:skip] = pattern
  else
    new_pattern = actions[:skip].union(pattern)
    actions[:skip] = new_pattern
  end
end

#skip_nl(pattern) ⇒ Object

Associate the provided pattern to the action of skipping a newline and incrementing the line counter.

Parameters:

  • pattern (Regexp)


57
58
59
# File 'lib/dendroid/utils/base_tokenizer.rb', line 57

def skip_nl(pattern)
  actions[:skip_nl] = pattern
end

#skip_ws(pattern) ⇒ Object

Associate the provided pattern with the action to skip whitespace(s).

Parameters:

  • pattern (Regexp)


63
64
65
# File 'lib/dendroid/utils/base_tokenizer.rb', line 63

def skip_ws(pattern)
  actions[:skip_ws] = pattern
end