Class: Lex::Lexer

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
DSL
Defined in:
lib/lex/lexer.rb,
lib/lex/lexer/dsl.rb,
lib/lex/lexer/rule_dsl.rb

Overview

An abstract lexer that doesn’t provide any lexing rules.

Direct Known Subclasses

Lex::Lexers::HTML

Defined Under Namespace

Modules: DSL Classes: RuleDSL

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from DSL

included

Constructor Details

#initialize(options = {}, &block) ⇒ Lexer

Returns a new instance of Lexer.



28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/lex/lexer.rb', line 28

def initialize(options = {}, &block)
  @current_line     = 1
  @current_pos      = 1 # Position in input
  @char_pos_in_line = 0
  @current_state    = :initial
  @state_stack      = []
  @logger           = Lex::Logger.new
  @linter           = Lex::Linter.new
  @debug            = options[:debug]
  @dsl              = self.class.dsl

  @dsl.instance_eval(&block) if block
  @linter.lint(self)
end

Instance Attribute Details

#current_lineObject (readonly)



13
14
15
# File 'lib/lex/lexer.rb', line 13

def current_line
  @current_line
end

#current_stateObject (readonly)



13
14
15
# File 'lib/lex/lexer.rb', line 13

def current_state
  @current_state
end

#debugObject (readonly)



13
14
15
# File 'lib/lex/lexer.rb', line 13

def debug
  @debug
end

#inputObject (readonly)



13
14
15
# File 'lib/lex/lexer.rb', line 13

def input
  @input
end

#loggerObject (readonly)



13
14
15
# File 'lib/lex/lexer.rb', line 13

def logger
  @logger
end

Instance Method Details

#advance_column(value) ⇒ Object



191
192
193
# File 'lib/lex/lexer.rb', line 191

def advance_column(value)
  @char_pos_in_line += value
end

#advance_line(value) ⇒ Object



186
187
188
189
# File 'lib/lex/lexer.rb', line 186

def advance_line(value)
  @current_line += value
  @char_pos_in_line = 0
end

#begin(state) ⇒ Object

Switches the state

Parameters:

  • state (Symbol)

    the name of the state



151
152
153
154
155
156
# File 'lib/lex/lexer.rb', line 151

def begin(state)
  unless @dsl.state_info.key?(state)
    complain("Undefined state: #{state}")
  end
  @current_state = state
end

#lex(input) ⇒ Enumerator

Tokenizes input and returns all tokens

Parameters:

  • input (String)

Returns:

  • (Enumerator)

    the tokens found



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/lex/lexer.rb', line 51

def lex(input)
  @input = input

  return enum_for(:lex, input) unless block_given?

  if debug
    logger.info "lex: tokens   = #{@dsl.lex_tokens}"
    logger.info "lex: states   = #{@dsl.state_info}"
    logger.info "lex: ignore   = #{@dsl.state_ignore}"
    logger.info "lex: error    = #{@dsl.state_error}"
  end

  stream_tokens(input) do |token|
    yield token
  end
end

#pop_stateObject

Restore previous state

Parameters:

  • state (Symbol)

    the name of the state



175
176
177
# File 'lib/lex/lexer.rb', line 175

def pop_state
  self.begin(@state_stack.pop)
end

#push_state(state) ⇒ Object

Enter new state and save old one on stack

Parameters:

  • state (Symbol)

    the name of the state



164
165
166
167
# File 'lib/lex/lexer.rb', line 164

def push_state(state)
  @state_stack << @current_state
  self.begin(state)
end

#rewindObject

Reset the internal state of the lexer



197
198
199
200
201
# File 'lib/lex/lexer.rb', line 197

def rewind
  @line = 1
  @column = 1
  @stack = []
end

#skip(n) ⇒ Object

Skip ahead n characters



182
183
184
# File 'lib/lex/lexer.rb', line 182

def skip(n)
  @current_pos += n
end

#stream_tokens(input) {|Lex::Token| ... } ⇒ Object

Advances through input and streams tokens

Parameters:

  • input (String)

Yields:



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/lex/lexer.rb', line 75

def stream_tokens(input, &block)
  scanner = StringScanner.new(input)
  while !scanner.eos?
    current_char = scanner.peek(1)
    if @dsl.state_ignore[current_state].include?(current_char)
      scanner.pos += current_char.size
      @char_pos_in_line += current_char.size
      next
    end

    if debug
      logger.info "lex: [#{current_state}]: lexemes = #{@dsl.state_lexemes[current_state].map(&:name)}"
    end
    # Look for regex match
    longest_token = nil
    @dsl.state_lexemes[current_state].each do |lexeme|
      match = lexeme.match(scanner)
      next if match.nil?
      longest_token = match if longest_token.nil?
      next if longest_token.value.length >= match.value.length
      longest_token = match
    end

    if longest_token
      if longest_token.action
        new_token = longest_token.action.call(self, longest_token)
        # No value returned from action move to the next token
        if new_token.nil? || !new_token.is_a?(Token)
          chars_to_skip = longest_token.value.to_s.length
          scanner.pos += chars_to_skip
          unless longest_token.name == :newline
            @char_pos_in_line += chars_to_skip
          end
          next
        end
      end
      move_by = longest_token.value.to_s.length
      start_char_pos_in_token = @char_pos_in_line + current_char.size
      longest_token.update_line(current_line, start_char_pos_in_token)
      advance_column(move_by)
      scanner.pos += move_by
    end

    # No match
    if longest_token.nil?
      # Check in errors
      if @dsl.state_error[current_state]
        token = Token.new(:error, current_char)
        start_char_pos_in_token = @char_pos_in_line + current_char.size
        token.update_line(current_line, start_char_pos_in_token)
        new_token = @dsl.state_error[current_state].call(self, token)
        advance_column(current_char.length)
        scanner.pos += current_char.length
        if new_token.is_a?(Token) || !new_token.nil?
          longest_token = new_token
        else
          next
        end
      end

      if longest_token.nil?
        complain("Illegal character `#{current_char}`")
      end
    end

    logger.info "lex: #{longest_token}" if debug
    block.call(longest_token)
  end
end