Class: Lex::Lexer
- Inherits:
-
Object
- Object
- Lex::Lexer
- Extended by:
- Forwardable
- Includes:
- DSL
- Defined in:
- lib/lex/lexer.rb,
lib/lex/lexer/dsl.rb,
lib/lex/lexer/rule_dsl.rb
Overview
An abstract lexer that doesn’t provide any lexing rules.
Direct Known Subclasses
Defined Under Namespace
Instance Attribute Summary collapse
- #current_line ⇒ Object readonly
- #current_state ⇒ Object readonly
- #debug ⇒ Object readonly
- #input ⇒ Object readonly
- #logger ⇒ Object readonly
Instance Method Summary collapse
- #advance_column(value) ⇒ Object
- #advance_line(value) ⇒ Object
-
#begin(state) ⇒ Object
Switches the state.
-
#initialize(options = {}, &block) ⇒ Lexer
constructor
A new instance of Lexer.
-
#lex(input) ⇒ Enumerator
Tokenizes input and returns all tokens.
-
#pop_state ⇒ Object
Restore previous state.
-
#push_state(state) ⇒ Object
Enter new state and save old one on stack.
-
#rewind ⇒ Object
Reset the internal state of the lexer.
-
#skip(n) ⇒ Object
Skip ahead n characters.
-
#stream_tokens(input) {|Lex::Token| ... } ⇒ Object
Advances through input and streams tokens.
Methods included from DSL
Constructor Details
#initialize(options = {}, &block) ⇒ Lexer
Returns a new instance of Lexer.
28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/lex/lexer.rb', line 28 def initialize( = {}, &block) @current_line = 1 @current_pos = 1 # Position in input @char_pos_in_line = 0 @current_state = :initial @state_stack = [] @logger = Lex::Logger.new @linter = Lex::Linter.new @debug = [:debug] @dsl = self.class.dsl @dsl.instance_eval(&block) if block @linter.lint(self) end |
Instance Attribute Details
#current_line ⇒ Object (readonly)
13 14 15 |
# File 'lib/lex/lexer.rb', line 13 def current_line @current_line end |
#current_state ⇒ Object (readonly)
13 14 15 |
# File 'lib/lex/lexer.rb', line 13 def current_state @current_state end |
#debug ⇒ Object (readonly)
13 14 15 |
# File 'lib/lex/lexer.rb', line 13 def debug @debug end |
#input ⇒ Object (readonly)
13 14 15 |
# File 'lib/lex/lexer.rb', line 13 def input @input end |
#logger ⇒ Object (readonly)
13 14 15 |
# File 'lib/lex/lexer.rb', line 13 def logger @logger end |
Instance Method Details
#advance_column(value) ⇒ Object
191 192 193 |
# File 'lib/lex/lexer.rb', line 191 def advance_column(value) @char_pos_in_line += value end |
#advance_line(value) ⇒ Object
186 187 188 189 |
# File 'lib/lex/lexer.rb', line 186 def advance_line(value) @current_line += value @char_pos_in_line = 0 end |
#begin(state) ⇒ Object
Switches the state
151 152 153 154 155 156 |
# File 'lib/lex/lexer.rb', line 151 def begin(state) unless @dsl.state_info.key?(state) complain("Undefined state: #{state}") end @current_state = state end |
#lex(input) ⇒ Enumerator
Tokenizes input and returns all tokens
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/lex/lexer.rb', line 51 def lex(input) @input = input return enum_for(:lex, input) unless block_given? if debug logger.info "lex: tokens = #{@dsl.lex_tokens}" logger.info "lex: states = #{@dsl.state_info}" logger.info "lex: ignore = #{@dsl.state_ignore}" logger.info "lex: error = #{@dsl.state_error}" end stream_tokens(input) do |token| yield token end end |
#pop_state ⇒ Object
Restore previous state
175 176 177 |
# File 'lib/lex/lexer.rb', line 175 def pop_state self.begin(@state_stack.pop) end |
#push_state(state) ⇒ Object
Enter new state and save old one on stack
164 165 166 167 |
# File 'lib/lex/lexer.rb', line 164 def push_state(state) @state_stack << @current_state self.begin(state) end |
#rewind ⇒ Object
Reset the internal state of the lexer
197 198 199 200 201 |
# File 'lib/lex/lexer.rb', line 197 def rewind @line = 1 @column = 1 @stack = [] end |
#skip(n) ⇒ Object
Skip ahead n characters
182 183 184 |
# File 'lib/lex/lexer.rb', line 182 def skip(n) @current_pos += n end |
#stream_tokens(input) {|Lex::Token| ... } ⇒ Object
Advances through input and streams tokens
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/lex/lexer.rb', line 75 def stream_tokens(input, &block) scanner = StringScanner.new(input) while !scanner.eos? current_char = scanner.peek(1) if @dsl.state_ignore[current_state].include?(current_char) scanner.pos += current_char.size @char_pos_in_line += current_char.size next end if debug logger.info "lex: [#{current_state}]: lexemes = #{@dsl.state_lexemes[current_state].map(&:name)}" end # Look for regex match longest_token = nil @dsl.state_lexemes[current_state].each do |lexeme| match = lexeme.match(scanner) next if match.nil? longest_token = match if longest_token.nil? next if longest_token.value.length >= match.value.length longest_token = match end if longest_token if longest_token.action new_token = longest_token.action.call(self, longest_token) # No value returned from action move to the next token if new_token.nil? || !new_token.is_a?(Token) chars_to_skip = longest_token.value.to_s.length scanner.pos += chars_to_skip unless longest_token.name == :newline @char_pos_in_line += chars_to_skip end next end end move_by = longest_token.value.to_s.length start_char_pos_in_token = @char_pos_in_line + current_char.size longest_token.update_line(current_line, start_char_pos_in_token) advance_column(move_by) scanner.pos += move_by end # No match if longest_token.nil? # Check in errors if @dsl.state_error[current_state] token = Token.new(:error, current_char) start_char_pos_in_token = @char_pos_in_line + current_char.size token.update_line(current_line, start_char_pos_in_token) new_token = @dsl.state_error[current_state].call(self, token) advance_column(current_char.length) scanner.pos += current_char.length if new_token.is_a?(Token) || !new_token.nil? longest_token = new_token else next end end if longest_token.nil? complain("Illegal character `#{current_char}`") end end logger.info "lex: #{longest_token}" if debug block.call(longest_token) end end |