Class: LL::GrammarCompiler

Inherits:
Object
  • Object
show all
Defined in:
lib/ll/grammar_compiler.rb

Overview

The GrammarCompiler class processes an AST (as parsed from an LL(1) grammar) and returns an CompiledGrammar instance.

Instance Method Summary collapse

Instance Method Details

#compile(ast) ⇒ LL::CompiledGrammar



11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/ll/grammar_compiler.rb', line 11

def compile(ast)
  compiled = CompiledGrammar.new

  process(ast, compiled)

  warn_for_unused_terminals(compiled)
  warn_for_unused_rules(compiled)

  verify_first_first(compiled)
  verify_first_follow(compiled)

  return compiled
end

#lookup_identifiers(node, compiled_grammar) ⇒ Array (private)

See Also:

  • LL::GrammarCompiler.[[#process]


404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
# File 'lib/ll/grammar_compiler.rb', line 404

def lookup_identifiers(node, compiled_grammar)
  idents = []

  node.children.each do |ident_node|
    retval = process(ident_node, compiled_grammar)

    # Literal rule/terminal names.
    if retval.is_a?(String)
      ident = compiled_grammar.lookup_identifier(retval)

      undefined_identifier!(retval, ident_node, compiled_grammar) if !ident
    # Epsilon
    else
      ident = retval
    end

    if ident
      ident.increment_references if ident.respond_to?(:increment_references)

      idents << ident
    end
  end

  return idents
end

#on_branch(node, compiled_grammar) ⇒ LL::Branch

Processes a single rule branch.

See Also:

  • LL::GrammarCompiler.[[#process]


301
302
303
304
305
306
307
308
309
310
311
# File 'lib/ll/grammar_compiler.rb', line 301

def on_branch(node, compiled_grammar)
  steps = process(node.children[0], compiled_grammar)

  if node.children[1]
    code = process(node.children[1], compiled_grammar)
  else
    code = nil
  end

  return Branch.new(steps, node.source_line, code)
end

#on_epsilon(node, compiled_grammar) ⇒ LL::Epsilon

Processes an epsilon.

See Also:

  • LL::GrammarCompiler.[[#process]


243
244
245
# File 'lib/ll/grammar_compiler.rb', line 243

def on_epsilon(node, compiled_grammar)
  return Epsilon.new(node.source_line)
end

#on_grammar(node, compiled_grammar) ⇒ Object

Processes the root node of a grammar.



146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/ll/grammar_compiler.rb', line 146

def on_grammar(node, compiled_grammar)
  # Create the prototypes for all rules since rules can be referenced before
  # they are defined.
  node.children.each do |child|
    if child.type == :rule
      on_rule_prototype(child, compiled_grammar)
    end
  end

  node.children.each do |child|
    process(child, compiled_grammar)
  end
end

#on_header(node, compiled_grammar) ⇒ Object

Processes a %header directive.

See Also:

  • LL::GrammarCompiler.[[#process]


213
214
215
# File 'lib/ll/grammar_compiler.rb', line 213

def on_header(node, compiled_grammar)
  compiled_grammar.header = process(node.children[0], compiled_grammar)
end

#on_ident(node, compiled_grammar) ⇒ String

Extracts the name from an identifier.

See Also:

  • LL::GrammarCompiler.[[#process]


233
234
235
# File 'lib/ll/grammar_compiler.rb', line 233

def on_ident(node, compiled_grammar)
  return node.children[0]
end

#on_inner(node, compiled_grammar) ⇒ Object

Processes an %inner directive.

See Also:

  • LL::GrammarCompiler.[[#process]


204
205
206
# File 'lib/ll/grammar_compiler.rb', line 204

def on_inner(node, compiled_grammar)
  compiled_grammar.inner = process(node.children[0], compiled_grammar)
end

#on_name(node, compiled_grammar) ⇒ Object

Sets the name of the parser.



166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/ll/grammar_compiler.rb', line 166

def on_name(node, compiled_grammar)
  if compiled_grammar.name
    compiled_grammar.add_warning(
      "Overwriting existing parser name #{compiled_grammar.name.inspect}",
      node.source_line
    )
  end

  parts = node.children.map { |child| process(child, compiled_grammar) }

  compiled_grammar.name = parts.join('::')
end

#on_plus(node, compiled_grammar) ⇒ LL::Operator

Processes the "+" operator.



351
352
353
354
355
356
357
358
359
360
361
362
363
# File 'lib/ll/grammar_compiler.rb', line 351

def on_plus(node, compiled_grammar)
  steps = lookup_identifiers(node, compiled_grammar)
  name  = "_ll_plus#{node.source_line.line}#{node.source_line.column}"
  rule  = Rule.new(name, node.source_line)

  rule.add_branch(steps, node.source_line)

  rule.increment_references

  compiled_grammar.add_rule(rule)

  return Operator.new(:plus, rule, node.source_line)
end

#on_question(node, compiled_grammar) ⇒ LL::Operator

Processes the "?" operator.



372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/ll/grammar_compiler.rb', line 372

def on_question(node, compiled_grammar)
  steps = lookup_identifiers(node, compiled_grammar)
  name  = "_ll_question#{node.source_line.line}#{node.source_line.column}"
  rule  = Rule.new(name, node.source_line)

  rule.add_branch(steps, node.source_line)

  rule.increment_references

  compiled_grammar.add_rule(rule)

  return Operator.new(:question, rule, node.source_line)
end

#on_ruby(node, compiled_grammar) ⇒ String

Processes a node containing Ruby source code.

See Also:

  • LL::GrammarCompiler.[[#process]


223
224
225
# File 'lib/ll/grammar_compiler.rb', line 223

def on_ruby(node, compiled_grammar)
  return node.children[0]
end

#on_rule(node, compiled_grammar) ⇒ Object

Processes the assignment of a rule.

See Also:

  • LL::GrammarCompiler.[[#process]


252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/ll/grammar_compiler.rb', line 252

def on_rule(node, compiled_grammar)
  name = process(node.children[0], compiled_grammar)

  if compiled_grammar.has_terminal?(name)
    compiled_grammar.add_error(
      "the rule name #{name.inspect} is already used as a terminal name",
      node.source_line
    )
  end

  if compiled_grammar.has_rule_with_branches?(name)
    compiled_grammar.add_error(
      "the rule #{name.inspect} has already been defined",
      node.source_line
    )

    return
  end

  branches = node.children[1..-1].map do |child|
    process(child, compiled_grammar)
  end

  rule = compiled_grammar.lookup_rule(name)

  rule.branches.concat(branches)
end

#on_rule_prototype(node, compiled_grammar) ⇒ Object

Creates a basic prototype for a rule.

See Also:

  • LL::GrammarCompiler.[[#process]


285
286
287
288
289
290
291
292
293
# File 'lib/ll/grammar_compiler.rb', line 285

def on_rule_prototype(node, compiled_grammar)
  name = process(node.children[0], compiled_grammar)

  return if compiled_grammar.has_rule?(name)

  rule = Rule.new(name, node.source_line)

  compiled_grammar.add_rule(rule)
end

#on_star(node, compiled_grammar) ⇒ LL::Operator

Processes the "*" operator.



330
331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/ll/grammar_compiler.rb', line 330

def on_star(node, compiled_grammar)
  steps = lookup_identifiers(node, compiled_grammar)
  name  = "_ll_star#{node.source_line.line}#{node.source_line.column}"
  rule  = Rule.new(name, node.source_line)

  rule.add_branch(steps, node.source_line)

  rule.increment_references

  compiled_grammar.add_rule(rule)

  return Operator.new(:star, rule, node.source_line)
end

#on_steps(node, compiled_grammar) ⇒ Array

Processes the steps of a branch.

See Also:

  • LL::GrammarCompiler.[[#process]


319
320
321
# File 'lib/ll/grammar_compiler.rb', line 319

def on_steps(node, compiled_grammar)
  return lookup_identifiers(node, compiled_grammar)
end

#on_terminals(node, compiled_grammar) ⇒ Object

Processes the assignment of terminals.

See Also:

  • LL::GrammarCompiler.[[#process]


184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/ll/grammar_compiler.rb', line 184

def on_terminals(node, compiled_grammar)
  node.children.each do |child|
    name = process(child, compiled_grammar)

    if compiled_grammar.has_terminal?(name)
      compiled_grammar.add_error(
        "The terminal #{name.inspect} has already been defined",
        child.source_line
      )
    else
      compiled_grammar.add_terminal(name, child.source_line)
    end
  end
end

#process(node, compiled_grammar) ⇒ LL::CompiledGrammar



30
31
32
33
34
# File 'lib/ll/grammar_compiler.rb', line 30

def process(node, compiled_grammar)
  handler = "on_#{node.type}"

  return send(handler, node, compiled_grammar)
end

#undefined_identifier!(name, node, compiled_grammar) ⇒ Object (private)



393
394
395
396
397
398
# File 'lib/ll/grammar_compiler.rb', line 393

def undefined_identifier!(name, node, compiled_grammar)
  compiled_grammar.add_error(
    "Undefined terminal or rule #{name.inspect}",
    node.source_line
  )
end

#verify_first_first(compiled_grammar) ⇒ Object

Verifies all rules to see if they don't have any first/first conflicts. Errors are added for every rule where this is the case.



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/ll/grammar_compiler.rb', line 75

def verify_first_first(compiled_grammar)
  compiled_grammar.rules.each do |rule|
    conflicting = Set.new

    rule.branches.each do |branch|
      next if conflicting.include?(branch)

      rule.branches.each do |other_branch|
        next if branch == other_branch || conflicting.include?(other_branch)

        overlapping = branch.first_set & other_branch.first_set

        unless overlapping.empty?
          conflicting << branch
          conflicting << other_branch
        end
      end
    end

    unless conflicting.empty?
      compiled_grammar.add_error(
        'first/first conflict, multiple branches start with the same terminals',
        rule.source_line
      )

      conflicting.each do |branch|
        labels = branch.first_set.map do |token|
          token.is_a?(Epsilon) ? 'epsilon' : token.name
        end

        compiled_grammar.add_error(
          "branch starts with: #{labels.join(', ')}",
          branch.source_line
        )
      end
    end
  end
end

#verify_first_follow(compiled_grammar) ⇒ Object

Adds errors for any rules containing first/follow conflicts.



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/ll/grammar_compiler.rb', line 119

def verify_first_follow(compiled_grammar)
  compiled_grammar.rules.each do |rule|
    rule.branches.each do |branch|
      has_epsilon = branch.first_set.find { |step| step.is_a?(Epsilon) }

      if has_epsilon and !branch.follow_set.empty?
        compiled_grammar.add_error(
          'first/follow conflict, branch can start with epsilon and is ' \
            'followed by (non) terminals',
          branch.source_line
        )

        compiled_grammar.add_error(
          'epsilon originates from here',
          has_epsilon.source_line
        )
      end
    end
  end
end

#warn_for_unused_rules(compiled_grammar) ⇒ Object

Adds warnings for any unused rules. The first defined rule is skipped since it's the root rule.



42
43
44
45
46
47
48
49
50
51
# File 'lib/ll/grammar_compiler.rb', line 42

def warn_for_unused_rules(compiled_grammar)
  compiled_grammar.rules.each_with_index do |rule, index|
    next if index == 0 || rule.references > 0

    compiled_grammar.add_warning(
      "Unused rule #{rule.name.inspect}",
      rule.source_line
    )
  end
end

#warn_for_unused_terminals(compiled_grammar) ⇒ Object

Adds warnings for any unused terminals.



58
59
60
61
62
63
64
65
66
67
# File 'lib/ll/grammar_compiler.rb', line 58

def warn_for_unused_terminals(compiled_grammar)
  compiled_grammar.terminals.each do |terminal|
    next if terminal.references > 0

    compiled_grammar.add_warning(
      "Unused terminal #{terminal.name.inspect}",
      terminal.source_line
    )
  end
end