Module: Parse

Defined in:
lib/rpdf2txt-rockit/rockit.rb,
lib/rpdf2txt-rockit/bootstrap.rb,
lib/rpdf2txt-rockit/rockit_grammars_parser.rb,
lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb,
lib/rpdf2txt-rockit/lalr_parsetable_generator.rb

Overview

Facade for rockit

Defined Under Namespace

Classes: LaLr1ParseTableGenerator, RockitProductionsEvaluator, StateGraph

Constant Summary collapse

RockitTokens =
[
  blank = t("Blank", /\s+/n, :Skip),
  comment = t("Comment", /#.*$/n, :Skip),
  string = t("String", /('((\\')|[^'])*')|("((\\")|[^"])*")/n),
  regexp = t("Regexp", /\/((\\\/)|[^\/])*\/[iomx]*/n),
  arrow = t("Arrow", /(->)|(::=)|(:)/n),
  symbol_name = t("SymbolName", /[A-Z][A-Za-z]*/n),
  production_reference = t("ProductionReference", /[A-Z][A-Za-z]*\d+/n)
]
RockitTokenProds =
[
  prod(:Tokens, ['Tokens', plus(:TokenSpec)], stb(:^, [:_, :tokens])),
  prod(:TokenSpec, 
[symbol_name, '=', ore(string, regexp), maybe(:TokenOpts)],
stb(nil, [:tokenname, :_, :regexp, :options])),
  prod(:TokenOpts, ['[', /:Skip/in, ']'], stb(:^, [:_, :options, :_]))
]
RockitPrioritiesProds =
[
  prod(:Priorities, ['Priorities', plus(:Priority)], stb(:^, [:_, :prios])),
  prod(:Priority, [ore('left(', 'right('), liste(:ProdRef, ','), ')'], 
stb(:Associativity, [:relation, :productionrefs, :_])),
  prod(:Priority, [:ProdRef, plus(ore('>', '='), :ProdRef)],
stb(:Precedence, [:first, :rest])),
  prod(:Priority, [:Priority, ','], stb(:^, [:prio, :_])),
  prod(:ProdRef, [ore(symbol_name, production_reference)], stb(:^))
]
RockitProductionsProds =
[
  prod(:Productions, ['Productions', plus(:Prod)], 
stb(:Productions, [:_, :productions])),
  prod(:Prod, [symbol_name, arrow, liste(:Alt, '|')],
stb(nil, [:nonterminal, :_, :alts])),
  prod(:Alt, [plus(:Element), maybe(:AstSpec)], 
stb(nil, [:elements, :astspec])),
  prod(:Element, [symbol_name], stb(:^)),
  prod(:Element, [ore(string, regexp)], stb(:ImplicitToken, [:regexp])),
  prod(:Element, [:Element, '?'], stb(:Maybe, [:element, :_])),
  prod(:Element, [:Element, '+'], stb(:Plus, [:element, :_])),
  prod(:Element, [:Element, '*'], stb(:Mult, [:element, :_])),
  prod(:Element, ['(', liste(:Element, '|'), ')'], 
stb(:Or, [:_,:elements, :_])),
  prod(:Element, ['(', plus(:Element), ')'], 
stb(:Sequence, [:_, :elements, :_])),
  prod(:Element, ['list(', :Element, ',', :Element, ')'],
stb(:List, [:_, :element, :_, :delimiter])),
  prod(:AstSpec, ['[', maybe(:ProdSpec), maybe(:ElemSpecs), ']'],
stb(nil, [:_, :prodspec, :elemspecs, :_])),
  prod(:ElemSpecs, [': ', liste(:ElemSpec, ',')], stb(:^, [:_, :specs])),
  prod(:ElemSpec, [ore(/[a-z]+/n, '_')], stb(:^)),
  prod(:ProdSpec, [ore(symbol_name, '^')], stb(:^, [:name])),
]
RockitProds =
[
  prod(:Grammar, 
['Grammar', /[A-Za-z]+([-_]*[A-Za-z\d]+)*/n,
  maybe(:Tokens), :Productions, maybe(:Priorities)], 
stb(:Grammar, [:_, :language, :tokens, :productions, :priorities]))
] + RockitTokenProds + RockitProductionsProds + RockitPrioritiesProds
@@parse_table538472678 =
ParseTable.new(productions,tokens,priorities,action_table,goto_hash,2,[
  :REDUCE,
  :SHIFT,
  :ACCEPT
])

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.generate_parser(aString, parserType = GeneralizedLrParser, tableGenerator = LaLr1ParseTableGenerator) ⇒ Object



16
17
18
19
20
21
22
23
# File 'lib/rpdf2txt-rockit/rockit.rb', line 16

def Parse.generate_parser(aString,
	    parserType = GeneralizedLrParser,
	    tableGenerator = LaLr1ParseTableGenerator)
  ast = rockit_grammars_parser.parse(aString)
  ast.compact!
  grammar = rockit_grammar_eval(ast)
  Parse.parser_from_grammar(grammar)
end

.generate_parser_from_file_to_file(grammarFile, outputFile, parserName = nil, moduleName = "Parse", grammarParser = rockit_grammars_parser, parserType = GeneralizedLrParser, tableGen = LaLr1ParseTableGenerator) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/rpdf2txt-rockit/rockit.rb', line 25

def Parse.generate_parser_from_file_to_file(grammarFile,
			      outputFile,
			      parserName = nil,
			      moduleName = "Parse",
			      grammarParser = rockit_grammars_parser,
			      parserType = GeneralizedLrParser,
			      tableGen = 
			        LaLr1ParseTableGenerator)
  if parserName == nil or parserName.kind_of?(String)
    parserName = as_module_method_named(moduleName, parserName || "parser")
  end
  grammar_text = nil
  File.open(grammarFile, "r") {|f| grammar_text = f.read}
  ast, grammar, parser = nil, nil, nil
  time_and_puts("Parsing #{grammarFile}") {
    ast = grammarParser.parse(grammar_text)
    ast.compact!
  }

  # File.open(outputFile + ".graph", "w") {|f| f.write ast.to_graph}

  time_and_puts("Building grammar from abstract syntax tree") {
    grammar = rockit_grammar_eval(ast)
  }
  time_and_puts("Generating parser from grammar") {
    parser = Parse.parser_from_grammar(grammar)
  }
  File.open(outputFile, "w") do |f|
    time_and_puts("Writing parser to file #{outputFile}") {
	f.write "# encoding: ascii-8bit\nrequire 'rpdf2txt-rockit/rockit'\n" + 
 parser.to_src_in_module(parserName, moduleName)
    }
  end
end

.map_symbolnames_to_productions(relationsWithStringsAsProductions, productions) ⇒ Object

Maps symbolnames used in priorities section to the corresponding productions. The production reference is determined by the rules:

1. It is the AST node name of the productions tree_builder if it is
     unique and not "^" or "_" or nil, or
2. It is the nonterminal name + an index starting at 1 for each unique
     nonterminal.


66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb', line 66

def map_symbolnames_to_productions(relationsWithStringsAsProductions,
		     productions)
  return relationsWithStringsAsProductions unless productions
  map = production_reference_map(productions)
  relationsWithStringsAsProductions.map do |relation|
    new_left, new_right = map[relation.left], map[relation.right]
    if nil == new_left or nil == new_right
	raise "Could not map #{relation.inspect} to the productions involved"
    end
    relation.left, relation.right = new_left, new_right
    relation
  end
end

.parser_from_grammar(aGrammar, parserType = GeneralizedLrParser, tableGenerator = LaLr1ParseTableGenerator) ⇒ Object



9
10
11
12
13
14
# File 'lib/rpdf2txt-rockit/rockit.rb', line 9

def Parse.parser_from_grammar(aGrammar,
		parserType = GeneralizedLrParser,
		tableGenerator = LaLr1ParseTableGenerator)
  pt = tableGenerator.new(aGrammar).generate_parse_table
  parserType.new(pt)
end

.priorities_as_relations(prioritiesAst) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb', line 33

def priorities_as_relations(prioritiesAst)
  relations = Array.new
  prioritiesAst.each do |node|
    case node.name
    when "Associativity"
	# We use the left and right shorthand funcs defined in 
	# conflict_resolution.rb.
	prod_refs = node.productionrefs.map {|sn| ":" + sn.lexeme}
	relations << eval(node.relation.lexeme + prod_refs.join(',') + ')')
    when "Precedence"
	left = node.first
	node.rest.childrens.each do |relation, right|
 if relation.lexeme == '>'
   relations.push decreasing_precedence(left.lexeme.intern, 
				 right.lexeme.intern)
 elsif relation.lexeme == '='
   relations.push equal_precedence(left.lexeme.intern, 
			    right.lexeme.intern)
 end
 left = right
	end
    end
  end
  relations
end

.production_reference_map(productions) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb', line 81

def production_reference_map(productions)
  map, production_counts = Hash.new, Hash.new(0)
  ast_name_counts, nonterminal_counts = Hash.new(0), Hash.new(0)

  # Pass 1 to get the count of the AST names and assign numbers
  productions.each do |p| 
    a = ast_name_counts[p.tree_builder.node_name] += 1
    b = nonterminal_counts[p.nonterminal.name] += 1
    production_counts[p] = [a, b]
  end

  # Pass 2 to assign the names
  productions.each do |production|
    ast_name = production.tree_builder.node_name
    if ["^", "_", :_, :^, nil].include?(ast_name) or
 ast_name_counts[ast_name] != 1
	ref = production.nonterminal.name + 
 production_counts[production].last.inspect
	map[ref.intern] = production
    else
	map[ast_name.intern] = production
    end
  end

  map
end

.rockit_grammar_eval(grammarAst) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb', line 174

def rockit_grammar_eval(grammarAst)
  if grammarAst.tokens
    tokens = rockit_tokens_eval(grammarAst.tokens)
  else
    tokens = []
  end
  productions = rockit_productions_eval(grammarAst.productions, tokens)
  if grammarAst.priorities
    priorities = rockit_priorities_eval(grammarAst.priorities, productions)
  end
  Grammar.new(grammarAst.language.lexeme, productions, tokens, priorities)
end

.rockit_grammars_bootstrap_parserObject



99
100
101
102
# File 'lib/rpdf2txt-rockit/bootstrap.rb', line 99

def rockit_grammars_bootstrap_parser
  Parse.parser_from_grammar(Grammar.new("RockitGrammar",
			  RockitProds, RockitTokens))
end

.rockit_grammars_parserObject



124
125
126
# File 'lib/rpdf2txt-rockit/rockit_grammars_parser.rb', line 124

def Parse.rockit_grammars_parser
  GeneralizedLrParser.new(@@parse_table538472678)
end

.rockit_priorities_eval(prioritiesAst, productions = nil) ⇒ Object

Evaluate priorities section. If given an array with productions we try to map the given SymbolNames to productions; if not we simply use the symbol names (as Ruby symbols) directly and the mapping must be done later.



26
27
28
29
30
# File 'lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb', line 26

def rockit_priorities_eval(prioritiesAst, productions = nil)
  relations = priorities_as_relations(prioritiesAst).flatten
  relations = map_symbolnames_to_productions(relations, productions)
  ProductionPriorities.new(relations)
end

.rockit_productions_eval(productionsAst, tokens = []) ⇒ Object

Return Array with the productions from a productionsAST. If an Array with tokens are given the token with the correct name is substituted for the name. If no matching token is found a Ruby Symbol is inserted.



113
114
115
116
# File 'lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb', line 113

def rockit_productions_eval(productionsAst, tokens = [])
  evaluator = RockitProductionsEvaluator.new(tokens)
  evaluator.eval_ast(productionsAst)
end

.rockit_productions_parserObject



85
86
87
88
89
# File 'lib/rpdf2txt-rockit/bootstrap.rb', line 85

def rockit_productions_parser
  Parse.parser_from_grammar(Grammar.new("RockitProductions", 
			  RockitProductionsProds, 
			  RockitTokens))
end

.rockit_tokens_eval(tokensAst) ⇒ Object

Evaluate a Rockit tokens section and return an array with the tokens



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb', line 4

def rockit_tokens_eval(tokensAst)
  tokens = Array.new
  tokensAst.each do |node|
    if node.name == "TokenSpec"
	# VERY simple handling of options right now since only one is valid.
	# Needs to be generalized...
	if node.options
 tokens.push t(node.tokenname.lexeme, 
	eval(node.regexp.lexeme), :Skip)
	else
 tokens.push t(node.tokenname.lexeme, eval(node.regexp.lexeme))
	end
    end
  end
  tokens
end

Instance Method Details

#rockit_priorities_parserObject



54
55
56
57
58
# File 'lib/rpdf2txt-rockit/bootstrap.rb', line 54

def rockit_priorities_parser
  Parse.parser_from_grammar(Grammar.new("RockitPriorities", 
			  RockitPrioritiesProds, 
			  RockitTokens))
end

#rockit_tokens_parserObject



39
40
41
42
# File 'lib/rpdf2txt-rockit/bootstrap.rb', line 39

def rockit_tokens_parser
  Parse.parser_from_grammar(Grammar.new("RockitTokens", RockitTokenProds, 
			  RockitTokens))
end