Module: EBNF::LL1::Parser

Defined in:
lib/ebnf/ll1/parser.rb

Overview

A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).

Defined Under Namespace

Modules: ClassMethods Classes: Error

Constant Summary collapse

DEBUG_LEVEL =

level above which debug messages are supressed

10

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#linenoInteger (readonly)

Returns line number of current token.

Returns:

  • (Integer)

    line number of current token



15
16
17
# File 'lib/ebnf/ll1/parser.rb', line 15

def lineno
  @lineno
end

Class Method Details

.included(base) ⇒ Object



17
18
19
# File 'lib/ebnf/ll1/parser.rb', line 17

def self.included(base)
  base.extend(ClassMethods)
end

Instance Method Details

#add_prod_data(sym, *values) ⇒ Object

Add values to production data, values aranged as an array



364
365
366
367
368
369
370
# File 'lib/ebnf/ll1/parser.rb', line 364

def add_prod_data(sym, *values)
  return if values.compact.empty?
  
  prod_data[sym] ||= []
  prod_data[sym] += values
  debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
end

#add_prod_datum(sym, values) ⇒ Object

Add a single value to prod_data, allows for values to be an array



348
349
350
351
352
353
354
355
356
357
358
359
360
361
# File 'lib/ebnf/ll1/parser.rb', line 348

def add_prod_datum(sym, values)
  case values
  when Array
    prod_data[sym] ||= []
    debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
    prod_data[sym] += values
  when nil
    return
  else
    prod_data[sym] ||= []
    debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} << #{values.inspect}"}
    prod_data[sym] << values
  end
end

#depthObject



342
# File 'lib/ebnf/ll1/parser.rb', line 342

def depth; (@productions || []).length; end

#parse(input = nil, prod = nil, options = {}) {|context, *data| ... } ⇒ EBNF::LL1::Parser

Initializes a new parser instance.

Attempts to recover from errors.

Examples:

require 'rdf/ll1/parser'

class MyParser
  include EBNF::LL1::Parser

  branch      MyParser::BRANCH

  ##
  # Defines a production called during different phases of parsing
  # with data from previous production along with data defined for the
  # current production
  #
  # Yield to generate a triple
  production :object do |parser, phase, input, current|
    object = current[:resource]
    yield :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
  end

  ##
  # Defines the pattern for a terminal node
  terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |parser, production, token, input|
    input[:BLANK_NODE_LABEL] = RDF::Node.new(token)
  end

  ##
  # Iterates the given block for each RDF statement in the input.
  #
  # @yield  [statement]
  # @yieldparam [RDF::Statement] statement
  # @return [void]
  def each_statement(&block)
    @callback = block

    parse(START.to_sym) do |context, *data|
      case context
      when :statement
        yield *data
      end
    end
  end

end

Parameters:

  • input (String, #to_s) (defaults to: nil)
  • prod (Symbol, #to_s) (defaults to: nil)

    The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.

  • options (Hash{Symbol => Object}) (defaults to: {})

Options Hash (options):

  • :branch (Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}})

    LL1 branch table.

  • :first (HHash{Symbol,String => Array<Symbol,String>}) — default: {}

    Lists valid terminals that can precede each production (for error recovery).

  • :follow (Hash{Symbol,String => Array<Symbol,String>}) — default: {}

    Lists valid terminals that can follow each production (for error recovery).

  • :validate (Boolean) — default: false

    whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.

  • :progress (Boolean)

    Show progress of parser productions

  • :debug (Boolean)

    Detailed debug output

Yields:

  • (context, *data)

    Yields for to return data to parser

Yield Parameters:

  • context (:statement, :trace)

    Context for block

  • *data (Symbol)

    Data specific to the call

Returns:

Raises:

See Also:



204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/ebnf/ll1/parser.rb', line 204

def parse(input = nil, prod = nil, options = {}, &block)
  @options = options.dup
  @branch  = options[:branch]
  @first  = options[:first] ||= {}
  @follow  = options[:follow] ||= {}
  @lexer   = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, @options)
  @productions = []
  @parse_callback = block
  @recovering = false
  @error_log = []
  terminals = self.class.patterns.map(&:type)  # Get defined terminals to help with branching

  # Unrecoverable errors
  raise Error, "Branch table not defined" unless @branch && @branch.length > 0
  raise Error, "Starting production not defined" unless prod

  @prod_data = [{}]
  prod = prod.split('#').last.to_sym unless prod.is_a?(Symbol)
  todo_stack = [{:prod => prod, :terms => nil}]

  while !todo_stack.empty?
    pushed = false
    if todo_stack.last[:terms].nil?
      todo_stack.last[:terms] = []
      cur_prod = todo_stack.last[:prod]

      # Get this first valid token appropriate for the stacked productions,
      # skipping invalid tokens until either a valid token is found (from @first),
      # or a token appearing in @follow appears.
      token = skip_until_valid(todo_stack)
      
      # At this point, token is either nil, in the first set of the production,
      # or in the follow set of this production or any previous production
      debug("parse(production)") do
        "token #{token ? token.representation.inspect : 'nil'}, " + 
        "prod #{cur_prod.inspect}, " + 
        "depth #{depth}"
      end
      
      # Got an opened production
      onStart(cur_prod)
      break if token.nil?
      
      if prod_branch = @branch[cur_prod]
        @recovering = false
        sequence = prod_branch[token.representation]
        debug("parse(production)", :level => 2) do
          "token #{token.representation.inspect} " +
          "prod #{cur_prod.inspect}, " + 
          "prod_branch #{prod_branch.keys.inspect}, " +
          "sequence #{sequence.inspect}"
        end

        if sequence.nil?
          if prod_branch.has_key?(:_empty)
            debug("parse(production)", :level => 2) {"empty sequence for _empty"}
          else
            # If there is no sequence for this production, we're
            # in error recovery, and _token_ has been advanced to
            # the point where it can reasonably follow this production
          end
        end
        todo_stack.last[:terms] += sequence if sequence
      else
        # Is this a fatal error?
        error("parse(fatal?)", "No branches found for #{cur_prod.inspect}",
          :production => cur_prod, :token => token)
      end
    end
    
    debug("parse(terms)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}"}
    while !todo_stack.last[:terms].to_a.empty?
      begin
        # Get the next term in this sequence
        term = todo_stack.last[:terms].shift
        debug("parse(token)") {"accept #{term.inspect}"}
        if token = accept(term)
          @recovering = false
          debug("parse(token)") {"token #{token.inspect}, term #{term.inspect}"}
          onToken(term, token)
        elsif terminals.include?(term)
          # If term is a terminal, then it is an error of token does not
          # match it
          skip_until_valid(todo_stack)
        else
          # If it's not a string (a symbol), it is a non-terminal and we push the new state
          todo_stack << {:prod => term, :terms => nil}
          debug("parse(push)", :level => 2) {"term #{term.inspect}, depth #{depth}"}
          pushed = true
          break
        end
      end
    end
    
    # After completing the last production in a sequence, pop down until we find a production
    #
    # If in recovery mode, continue popping until we find a term with a follow list
    while !pushed &&
          !todo_stack.empty? &&
          ( (terms = todo_stack.last.fetch(:terms, [])).empty? ||
            (@recovering && @follow.fetch(terms.last, []).none? {|t| token == t}))
      debug("parse(pop)", :level => 2) {"todo #{todo_stack.last.inspect}, depth #{depth}"}
      if terms.empty?
        prod = todo_stack.last[:prod]
        todo_stack.pop
        onFinish
      else
        # Stop recovering when we a production which starts with the term
        debug("parse(pop)", :level => 2) {"recovery complete"}
        @recovering = false
      end
    end
  end

  error("parse(eof)", "Finished processing before end of file", :token => @lexer.first) if @lexer.first

  # Continue popping contexts off of the stack
  while !todo_stack.empty?
    debug("parse(eof)", :level => 2) {"stack #{todo_stack.last.inspect}, depth #{depth}"}
    # There can't be anything left to do, or if there is, it must be optional
    last_terms = todo_stack.last[:terms]
    if last_terms.length > 0 && last_terms.none? {|t|
      @first.has_key?(t) && @first[t].include?(:_eps)
    }
      error("parse(eof)",
        "End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}"
      )
    end
    todo_stack.pop
    onFinish
  end
  
  # When all is said and done, raise the error log
  unless @error_log.empty?
    raise Error, @error_log.join("\n\t") 
  end
end

#prod_dataObject

Current ProdData element



345
# File 'lib/ebnf/ll1/parser.rb', line 345

def prod_data; @prod_data.last; end