Module: EBNF::LL1::Parser

Defined in:
lib/ebnf/ll1/parser.rb

Overview

A Generic LL1 parser using a lexer and branch tables defined using the SWAP tool chain (modified).

Defined Under Namespace

Modules: ClassMethods Classes: Error

Constant Summary collapse

DEBUG_LEVEL =

level above which debug messages are supressed

10

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#linenoInteger (readonly)

Returns line number of current token.

Returns:

  • (Integer)

    line number of current token



14
15
16
# File 'lib/ebnf/ll1/parser.rb', line 14

def lineno
  @lineno
end

Class Method Details

.included(base) ⇒ Object



16
17
18
# File 'lib/ebnf/ll1/parser.rb', line 16

def self.included(base)
  base.extend(ClassMethods)
end

Instance Method Details

#add_prod_data(sym, *values) ⇒ Object

Add values to production data, values aranged as an array



451
452
453
454
455
456
457
# File 'lib/ebnf/ll1/parser.rb', line 451

def add_prod_data(sym, *values)
  return if values.compact.empty?

  prod_data[sym] ||= []
  prod_data[sym] += values
  debug("add_prod_data(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
end

#add_prod_datum(sym, values) ⇒ Object

Add a single value to prod_data, allows for values to be an array



435
436
437
438
439
440
441
442
443
444
445
446
447
448
# File 'lib/ebnf/ll1/parser.rb', line 435

def add_prod_datum(sym, values)
  case values
  when Array
    prod_data[sym] ||= []
    debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} += #{values.inspect}"}
    prod_data[sym] += values
  when nil
    return
  else
    prod_data[sym] ||= []
    debug("add_prod_datum(#{sym})") {"#{prod_data[sym].inspect} << #{values.inspect}"}
    prod_data[sym] << values
  end
end

#depthObject



429
# File 'lib/ebnf/ll1/parser.rb', line 429

def depth; (@productions || []).length; end

#parse(input = nil, start = nil, options = {}) {|context, *data| ... } ⇒ EBNF::LL1::Parser

Initializes a new parser instance.

Attempts to recover from errors.

Examples:

require 'rdf/ll1/parser'

class MyParser
  include EBNF::LL1::Parser

  branch      MyParser::BRANCH

  ##
  # Defines a production called during before parsing a non-terminal
  # with data from previous production along with data defined for the
  # current production
  #
  start_production :object do |input, current, callback|
    # Note production as triples for blankNodePropertyList
    # to set :subject instead of :resource
    current[:triples] = true
  end

  ##
  # Defines a production called during after parsing a non-terminal
  # with data from previous production along with data defined for the
  # current production
  #
  # callback to processor block
  production :object do |input, current, callback|
    object = current[:resource]
    callback.call :statement, RDF::Statement.new(input[:subject], input[:predicate], object)
  end

  ##
  # Defines the pattern for a terminal node
  terminal :BLANK_NODE_LABEL, %r(_:(#{PN_LOCAL})) do |production, token, input|
    input[:BLANK_NODE_LABEL] = RDF::Node.new(token)
  end

  ##
  # Iterates the given block for each RDF statement in the input.
  #
  # @yield  [statement]
  # @yieldparam [RDF::Statement] statement
  # @return [void]
  def each_statement(&block)
    @callback = block

    parse(START.to_sym) do |context, *data|
      case context
      when :statement
        yield *data
      end
    end
  end

end

Parameters:

  • input (String, #to_s) (defaults to: nil)
  • start (Symbol, #to_s) (defaults to: nil)

    The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.

  • options (Hash{Symbol => Object}) (defaults to: {})

Options Hash (options):

  • :branch (Hash{Symbol,String => Hash{Symbol,String => Array<Symbol,String>}})

    LL1 branch table.

  • :first (HHash{Symbol,String => Array<Symbol,String>}) — default: {}

    Lists valid terminals that can precede each production (for error recovery).

  • :follow (Hash{Symbol,String => Array<Symbol,String>}) — default: {}

    Lists valid terminals that can follow each production (for error recovery).

  • :validate (Boolean) — default: false

    whether to validate the parsed statements and values. If not validating, the parser will attempt to recover from errors.

  • :progress (Boolean)

    Show progress of parser productions

  • :debug (Boolean)

    Detailed debug output

  • :reset_on_start (Boolean)

    Reset the parser state if the start token set with ‘prod` is found in a production. This reduces the production stack depth growth, which is appropriate for some grammars.

Yields:

  • (context, *data)

    Yields for to return data to parser

Yield Parameters:

  • context (:statement, :trace)

    Context for block

  • *data (Symbol)

    Data specific to the call

Returns:

Raises:

  • (Exception)

    Raises exceptions for parsing errors or errors raised during processing callbacks. Internal errors are raised using Error.

See Also:



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
# File 'lib/ebnf/ll1/parser.rb', line 220

def parse(input = nil, start = nil, options = {}, &block)
  @options = options.dup
  @options[:debug] ||= case
  when @options[:progress] then 2
  when @options[:validate] then 1
  end
  @branch  = options[:branch]
  @first  = options[:first] ||= {}
  @follow  = options[:follow] ||= {}
  @lexer   = input.is_a?(Lexer) ? input : Lexer.new(input, self.class.patterns, @options)
  @productions = []
  @parse_callback = block
  @recovering = false
  @error_log = []
  terminals = self.class.patterns.map(&:type)  # Get defined terminals to help with branching

  # Unrecoverable errors
  raise Error, "Branch table not defined" unless @branch && @branch.length > 0
  raise Error, "Starting production not defined" unless start

  @prod_data = [{}]
  start = start.split('#').last.to_sym unless start.is_a?(Symbol)
  todo_stack = [{:prod => start, :terms => nil}]

  while !todo_stack.empty?
    begin
      @recovering = false
      pushed = false
      if todo_stack.last[:terms].nil?
        todo_stack.last[:terms] = []
        cur_prod = todo_stack.last[:prod]

        # If cur_prod is the starting production, we can reset the stack
        # to the beginning to avoid excessive growth in the production
        # stack
        if options[:reset_on_start] && cur_prod == start
          todo_stack = [{:prod => start, :terms => []}]
          @productions = []
          @prod_data = [{}]
        end

        # Fetch the current token
        token = get_token(:recover)

        # At this point, token is either nil, in the first set of the production,
        # or in the follow set of this production or any previous production
        debug("parse(production)") do
          "token #{token ? token.representation.inspect : 'nil'}, " +
          "prod #{cur_prod.inspect}, " +
          "depth #{depth}"
        end

        # Got an opened production
        onStart(cur_prod)

        if token.nil?
          if  !(first_include?(cur_prod, :_eps) && follow_include?(cur_prod, :_eof))
            # End of file, and production does not contain eps, or it does, but follow does not contain eof 
            raise Error.new("Unexpected end of input", :production => cur_prod)
          else
            debug("parse(production)") {"End of input prod #{cur_prod.inspect}"}
          end
        elsif prod_branch = @branch[cur_prod]
          sequence = prod_branch.fetch(token.representation) do
            raise Error.new("Expected one of #{@first[cur_prod].inspect}",
              :token => token, :production => cur_prod)
          end
          debug("parse(production)") do
            "token #{token.representation.inspect} " +
            "prod #{cur_prod.inspect}, " +
            "prod_branch #{prod_branch.keys.inspect}, " +
            "sequence #{sequence.inspect}"
          end
          todo_stack.last[:terms] += sequence
        else
          raise Error.new("Unexpected",
            :production => cur_prod, :token => token)
        end
      end

      debug("parse(terms)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
      while !todo_stack.last[:terms].to_a.empty?
        # Get the next term in this sequence
        term = todo_stack.last[:terms].shift
        debug("parse(token)") {"accept #{term.inspect}"}

        if token = accept(term)
          debug("parse(token)") {"token #{token.inspect}, term #{term.inspect}"}
          onTerminal(term, token)
        elsif terminals.include?(term) 
          # If term is a terminal, then it is an error if token does not
          # match it
          raise Error.new("Expected #{term.inspect}",
            :token => get_token, :production => cur_prod)
        else
          token = get_token

          # If token is not in firsts of term, but eps is, skip to next
          # term
          if first_include?(term, :_eps) && !first_include?(term, token)
            debug("parse(token)") {"skip optional term #{term.inspect} on #{token.inspect}"}
            break
          else
            # Push term onto stack
            todo_stack << {:prod => term, :terms => nil}
            debug("parse(push)") {"term #{term.inspect}, depth #{depth}"}
            pushed = true
            break
          end
        end
      end
    rescue Lexer::Error, Error => e
      # Lexer encountered an illegal token or the parser encountered
      # a terminal which is inappropriate for the current production.
      # Perform error recovery to find a reasonable terminal based
      # on the follow sets of the relevant productions. This includes
      # remaining terms from the current production and the stacked
      # productions
      @lineno = e.lineno
      if e.is_a?(Lexer::Error)
        # Skip to the next valid terminal
        @lexer.recover
        error("parse(#{e.class})", "With input '#{e.input}': #{e.message}",
              :production => @productions.last)
      else
        # Otherwise, the terminal is fine, just not for this production.
        @lexer.shift
        error("parse(#{e.class})", "#{e.message}",
              :production => @productions.last, :token => e.token)
      end

      # Get the list of follows for this sequence, this production and the stacked productions.
      debug("recovery", "stack follows:", :level => 4)
      todo_stack.reverse.each do |todo|
        debug("recovery", :level => 4) {"  #{todo[:prod]}: #{@follow[todo[:prod]].inspect}"}
      end

      # Find all follows to the top of the stack
      follows = todo_stack.inject([]) do |follow, todo|
        prod = todo[:prod]
        follow += @follow[prod] || []
      end.uniq
      debug("recovery") {"follows: #{follows.inspect}"}

      # Skip tokens until one is found in follows
      while (token = get_token(:recover)) && follows.none? {|t| token === t}
        skipped = @lexer.shift
        progress("recovery") {"skip #{skipped.inspect}"}
      end
      debug("recovery") {"found #{token.inspect} in follows"}

      # Pop stack elements until token is in follows
      while !todo_stack.empty? &&
            !follow_include?(todo_stack.last[:prod], token || :_eof)
        debug("recovery(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
        todo_stack.pop
        onFinish
      end

      # Token is now in the first of the top production
      unless todo_stack.empty?
        todo_stack.pop
        onFinish
      end

      if todo_stack.empty?
        # Recovered to end of last production
        warn("recover", "recovered to end of productions")
      else
        warn("recover", "recovered to #{todo_stack.last[:prod].inspect} with #{token.inspect}")
      end

      @recovering = false
    ensure
      # After completing the last production in a sequence, pop down until we find a production
      #
      # If in recovery mode, continue popping until we find a term with a follow list
      while !pushed &&
            !todo_stack.empty? &&
            todo_stack.last.fetch(:terms, []).empty?
        debug("parse(pop)") {"todo #{todo_stack.last.inspect}, depth #{depth}"}
        todo_stack.pop
        onFinish
      end
    end
  end

  error("parse(eof)", "Finished processing before end of file", :token => @lexer.first) if @lexer.first

  # Continue popping contexts off of the stack
  while !todo_stack.empty?
    debug("parse(eof)") {"stack #{todo_stack.last.inspect}, depth #{depth}"}
    # There can't be anything left to do, or if there is, it must be optional
    last_terms = todo_stack.last[:terms]
    if last_terms.length > 0 && last_terms.none? {|t|first_include?(t, :_eps)}
      error("parse(eof)",
        "End of input before end of production: stack #{todo_stack.last.inspect}, depth #{depth}"
      )
    end
    todo_stack.pop
    onFinish
  end

  # When all is said and done, raise the error log
  unless @error_log.empty?
    raise Error, @error_log.join("\n")
  end
end

#prod_dataObject

Current ProdData element



432
# File 'lib/ebnf/ll1/parser.rb', line 432

def prod_data; @prod_data.last; end