Class: EBNF::Rule

Inherits:
Object
  • Object
show all
Defined in:
lib/ebnf/rule.rb

Overview

Represent individual parsed rules

Constant Summary collapse

BNF_OPS =

Operations which are flattened to seprate rules in to_bnf.

%w{
  alt opt plus seq star
}.map(&:to_sym).freeze
TERM_OPS =
%w{
  diff hex range
}.map(&:to_sym).freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil) ⇒ Rule

Returns a new instance of Rule.

Parameters:

  • sym (Symbol)
  • id (Integer)
  • expr (Array)
  • kind (Symbol) (defaults to: nil)

    (nil)

  • ebnf (String) (defaults to: nil)

    (nil)

  • first (Array) (defaults to: nil)

    (nil)

  • follow (Array) (defaults to: nil)

    (nil)

  • start (Boolean) (defaults to: nil)

    (nil)

  • top_rule (Rule) (defaults to: nil)

    (nil)

  • cleanup (Boolean) (defaults to: nil)

    (nil)



72
73
74
75
76
77
78
79
80
81
82
# File 'lib/ebnf/rule.rb', line 72

def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
  @sym, @id = sym, id
  @expr = expr.is_a?(Array) ? expr : [:seq, expr]
  @ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule
  @top_rule ||= self
  @kind ||= case
  when sym.to_s == sym.to_s.upcase then :terminal
  when !BNF_OPS.include?(@expr.first) then :terminal
  else :rule
  end
end

Instance Attribute Details

#cleanupObject

Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF



60
61
62
# File 'lib/ebnf/rule.rb', line 60

def cleanup
  @cleanup
end

#compRule

A comprehension is a sequence which contains all elements but the first of the original rule.

Returns:



27
28
29
# File 'lib/ebnf/rule.rb', line 27

def comp
  @comp
end

#exprArray

Rule expression

Returns:

  • (Array)


37
38
39
# File 'lib/ebnf/rule.rb', line 37

def expr
  @expr
end

#firstArray<Rule> (readonly)

Terminals that immediately procede this rule

Returns:



47
48
49
# File 'lib/ebnf/rule.rb', line 47

def first
  @first
end

#followArray<Rule> (readonly)

Terminals that immediately follow this rule

Returns:



52
53
54
# File 'lib/ebnf/rule.rb', line 52

def follow
  @follow
end

#idString

ID of rule

Returns:

  • (String)


22
23
24
# File 'lib/ebnf/rule.rb', line 22

def id
  @id
end

#kind:rule, ...

Kind of rule

Returns:

  • (:rule, :terminal, or :pass)


32
33
34
# File 'lib/ebnf/rule.rb', line 32

def kind
  @kind
end

#origString

Original EBNF

Returns:

  • (String)


42
43
44
# File 'lib/ebnf/rule.rb', line 42

def orig
  @orig
end

#startBoolean

Indicates that this is a starting rule

Returns:

  • (Boolean)


57
58
59
# File 'lib/ebnf/rule.rb', line 57

def start
  @start
end

#symSymbol

Symbol of rule

Returns:

  • (Symbol)


18
19
20
# File 'lib/ebnf/rule.rb', line 18

def sym
  @sym
end

Class Method Details

.from_sxp(sxp) ⇒ Rule

Return a rule from its SXP representation:

Also may have ‘(first …)`, `(follow …)`, or `(start #t)`.

Examples:

inputs

(pass (plus (range "#x20\\t\\r\\n")))
(rule ebnf "1" (star (alt declaration rule)))
(terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))

Parameters:

  • sxp (Array)

Returns:



96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/ebnf/rule.rb', line 96

def self.from_sxp(sxp)
  expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)}
  first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first}
  first = first[1..-1] if first
  follow = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :follow}
  follow = follow[1..-1] if follow
  cleanup = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :cleanup}
  cleanup = cleanup[1..-1] if cleanup
  start = sxp.any? {|e| e.is_a?(Array) && e.first.to_sym == :start}
  sym = sxp[1] if sxp[1].is_a?(Symbol)
  id = sxp[2] if sxp[2].is_a?(String)
  self.new(sym, id, expr, kind: sxp.first, first: first, follow: follow, cleanup: cleanup, start: start)
end

Instance Method Details

#<=>(other) ⇒ Object

Rules compare using their ids



443
444
445
446
447
448
449
# File 'lib/ebnf/rule.rb', line 443

def <=>(other)
  if id.to_i == other.id.to_i
    id.to_s <=> other.id.to_s
  else
    id.to_i <=> other.id.to_i
  end
end

#==(other) ⇒ Boolean

Two rules are equal if they have the same #sym, #kind and #expr.

Parameters:

Returns:

  • (Boolean)


428
429
430
431
432
# File 'lib/ebnf/rule.rb', line 428

def ==(other)
  sym   == other.sym &&
  kind  == other.kind &&
  expr  == other.expr
end

#add_first(terminals) ⇒ Integer

Add terminal as proceding this rule.

Parameters:

  • terminals (Array<Rule, Symbol, String>)

Returns:

  • (Integer)

    if number of terminals added



363
364
365
366
367
368
# File 'lib/ebnf/rule.rb', line 363

def add_first(terminals)
  @first ||= []
  terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - @first
  @first += terminals
  terminals.length
end

#add_follow(terminals) ⇒ Integer

Add terminal as following this rule. Don’t add _eps as a follow

Parameters:

  • terminals (Array<Rule, Symbol, String>)

Returns:

  • (Integer)

    if number of terminals added



374
375
376
377
378
379
380
381
382
# File 'lib/ebnf/rule.rb', line 374

def add_follow(terminals)
  # Remove terminals already in follows, and empty string
  terminals = terminals.map {|t| t.is_a?(Rule) ? t.sym : t} - (@follow || []) - [:_eps]
  unless terminals.empty?
    @follow ||= []
    @follow += terminals
  end
  terminals.length
end

#alt?Boolean

Is this rule of the form (alt …)?

Returns:

  • (Boolean)


404
405
406
# File 'lib/ebnf/rule.rb', line 404

def alt?
  expr.is_a?(Array) && expr.first == :alt
end

#build(expr, kind: nil, cleanup: nil, **options) ⇒ Object

Build a new rule creating a symbol and numbering from the current rule Symbol and number creation is handled by the top-most rule in such a chain.

Parameters:

  • expr (Array)
  • kind (Symbol) (defaults to: nil)

    (nil)

  • cleanup (Hash{Symbol => Symbol}) (defaults to: nil)

    (nil)

  • options (Hash{Symbol => Object})


117
118
119
120
121
122
123
124
125
# File 'lib/ebnf/rule.rb', line 117

def build(expr, kind: nil, cleanup: nil, **options)
  new_sym, new_id = (@top_rule ||self).send(:make_sym_id)
  self.class.new(new_sym, new_id, expr,
                 kind: kind,
                 ebnf: @ebnf,
                 top_rule: (@top_rule || self),
                 cleanup: cleanup,
                 **options)
end

#equivalent?(other) ⇒ Boolean

Two rules are equivalent if they have the same #expr.

Parameters:

Returns:

  • (Boolean)


438
439
440
# File 'lib/ebnf/rule.rb', line 438

def equivalent?(other)
  expr == other.expr
end

#first_includes_eps?Boolean

Do the firsts of this rule include the empty string?

Returns:

  • (Boolean)


355
356
357
# File 'lib/ebnf/rule.rb', line 355

def first_includes_eps?
  @first && @first.include?(:_eps)
end

#for_sxpArray

Return representation for building S-Expressions.

Returns:

  • (Array)


130
131
132
133
134
135
136
137
138
139
# File 'lib/ebnf/rule.rb', line 130

def for_sxp
  elements = [kind, sym]
  elements << id if id
  elements << [:start, true] if start
  elements << first.sort_by(&:to_s).unshift(:first) if first
  elements << follow.sort_by(&:to_s).unshift(:follow) if follow
  elements << [:cleanup, cleanup] if cleanup
  elements << expr
  elements
end

#inspectObject



418
419
420
421
422
# File 'lib/ebnf/rule.rb', line 418

def inspect
  "#<EBNF::Rule:#{object_id} " +
  {sym: sym, id: id, kind: kind, expr: expr}.inspect +
  ">"
end

#non_terminals(ast) ⇒ Array<Rule>

Return the non-terminals for this rule. For seq, this is the first non-terminal in the sequence. For alt, this is every non-terminal in the alt.

Parameters:

  • ast (Array<Rule>)

    The set of rules, used to turn symbols into rules

Returns:



303
304
305
306
307
308
309
310
311
312
313
# File 'lib/ebnf/rule.rb', line 303

def non_terminals(ast)
  @non_terms ||= (alt? ? expr[1..-1] : expr[1,1]).map do |sym|
    case sym
    when Symbol
      r = ast.detect {|r| r.sym == sym}
      r if r && r.rule?
    else
      nil
    end
  end.compact
end

#pass?Boolean

Is this a pass?

Returns:

  • (Boolean)


393
394
395
# File 'lib/ebnf/rule.rb', line 393

def pass?
  kind == :pass
end

#rule?Boolean

Is this a rule?

Returns:

  • (Boolean)


399
400
401
# File 'lib/ebnf/rule.rb', line 399

def rule?
  kind == :rule
end

#seq?Boolean

Is this rule of the form (seq …)?

Returns:

  • (Boolean)


409
410
411
# File 'lib/ebnf/rule.rb', line 409

def seq?
  expr.is_a?(Array) && expr.first == :seq
end

#starts_with?(sym) ⇒ Array<Symbol, String>

Does this rule start with a sym? It does if expr is that sym, expr starts with alt and contains that sym, or expr starts with seq and the next element is that sym.

Parameters:

  • sym (Symbol, class)

    Symbol matching any start element, or if it is String, any start element which is a String

Returns:

  • (Array<Symbol, String>)

    list of symbol (singular), or strings which are start symbol, or nil if there are none



342
343
344
345
346
347
348
349
350
# File 'lib/ebnf/rule.rb', line 342

def starts_with?(sym)
  if seq? && sym === (v = expr.fetch(1, nil))
    [v]
  elsif alt? && expr.any? {|e| sym === e}
    expr.select {|e| sym === e}
  else
    nil
  end
end

#terminal?Boolean

Is this a terminal?

Returns:

  • (Boolean)


387
388
389
# File 'lib/ebnf/rule.rb', line 387

def terminal?
  kind == :terminal
end

#terminals(ast) ⇒ Array<Rule>

Return the terminals for this rule. For seq, this is the first terminals or strings in the seq. For alt, this is every non-terminal ni the alt.

Parameters:

  • ast (Array<Rule>)

    The set of rules, used to turn symbols into rules

Returns:



321
322
323
324
325
326
327
328
329
330
331
332
333
# File 'lib/ebnf/rule.rb', line 321

def terminals(ast)
  @terms ||= (alt? ? expr[1..-1] : expr[1,1]).map do |sym|
    case sym
    when Symbol
      r = ast.detect {|r| r.sym == sym}
      r if r && r.terminal?
    when String
      sym
    else
      nil
    end
  end.compact
end

#to_bnfArray<Rule>

Transform EBNF rule to BNF rules:

* Transform (rule a "n" (op1 (op2))) into two rules:
  (rule a "n" (op1 _a_1))
  (rule _a_1 "n.1" (op2))
* Transform (rule a (opt b)) into (rule a (alt _empty b))
* Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
* Transform (rule a (plus b)) into (rule a (seq b (star b)

Transformation includes information used to re-construct non-transformed.

AST representation

Returns:



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/ebnf/rule.rb', line 189

def to_bnf
  return [self] unless rule?
  new_rules = []

  # Look for rules containing recursive definition and rewrite to multiple rules. If `expr` contains elements which are in array form, where the first element of that array is a symbol, create a new rule for it.
  if expr.any? {|e| e.is_a?(Array) && (BNF_OPS + TERM_OPS).include?(e.first)}
    #   * Transform (a [n] rule (op1 (op2))) into two rules:
    #     (a.1 [n.1] rule (op1 a.2))
    #     (a.2 [n.2] rule (op2))
    # duplicate ourselves for rewriting
    this = dup
    new_rules << this

    expr.each_with_index do |e, index|
      next unless e.is_a?(Array) && e.first.is_a?(Symbol)
      new_rule = build(e)
      this.expr[index] = new_rule.sym
      new_rules << new_rule
    end

    # Return new rules after recursively applying #to_bnf
    new_rules = new_rules.map {|r| r.to_bnf}.flatten
  elsif expr.first == :opt
    this = dup
    #   * Transform (rule a (opt b)) into (rule a (alt _empty b))
    this.expr = [:alt, :_empty, expr.last]
    this.cleanup = :opt
    new_rules = this.to_bnf
  elsif expr.first == :star
    #   * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
    this = dup
    this.cleanup = :star
    new_rule = this.build([:seq, expr.last, this.sym], cleanup: :merge)
    this.expr = [:alt, :_empty, new_rule.sym]
    new_rules = [this] + new_rule.to_bnf
  elsif expr.first == :plus
    #   * Transform (rule a (plus b)) into (rule a (seq b (star b)
    this = dup
    this.cleanup = :plus
    this.expr = [:seq, expr.last, [:star, expr.last]]
    new_rules = this.to_bnf
  elsif [:alt, :seq].include?(expr.first)
    # Otherwise, no further transformation necessary
    new_rules << self
  elsif [:diff, :hex, :range].include?(expr.first)
    # This rules are fine, the just need to be terminals
    raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
    new_rules << self
  else
    # Some case we didn't think of
    raise "Error trying to transform #{expr.inspect} to BNF"
  end
  
  return new_rules
end

#to_pegArray<Rule>

Transform EBNF rule for PEG:

* Transform (rule a "n" (op1 ... (op2 y) ...z)) into two rules:
  (rule a "n" (op1 ... _a_1 ... z))
  (rule _a_1 "n.1" (op2 y))

Returns:



253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/ebnf/rule.rb', line 253

def to_peg
  new_rules = []

  # Look for rules containing sub-sequences
  if expr.any? {|e| e.is_a?(Array) && e.first.is_a?(Symbol)}
    # duplicate ourselves for rewriting
    this = dup
    new_rules << this

    expr.each_with_index do |e, index|
      next unless e.is_a?(Array) && e.first.is_a?(Symbol)
      new_rule = build(e)
      this.expr[index] = new_rule.sym
      new_rules << new_rule
    end

    # Return new rules after recursively applying #to_bnf
    new_rules = new_rules.map {|r| r.to_peg}.flatten
  elsif [:diff, :hex, :range].include?(expr.first)
    # This rules are fine, the just need to be terminals
    raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
    new_rules << self
  else
    new_rules << self
  end
  
  return new_rules.map {|r| r.extend(EBNF::PEG::Rule)}
end

#to_regexpRegexp

For :hex or :range, create a regular expression.

Returns:

  • (Regexp)


286
287
288
289
290
291
292
293
294
295
# File 'lib/ebnf/rule.rb', line 286

def to_regexp
  case expr.first
  when :hex
    Regexp.new(translate_codepoints(expr[1]))
  when :range
    Regexp.new("[#{translate_codepoints(expr[1])}]")
  else
    raise "Can't turn #{expr.inspect} into a regexp"
  end
end

#to_rubyString

Return a Ruby representation of this rule

Returns:

  • (String)


171
172
173
# File 'lib/ebnf/rule.rb', line 171

def to_ruby
  "EBNF::Rule.new(#{sym.inspect}, #{id.inspect}, #{expr.inspect}#{', kind: ' + kind.inspect unless kind == :rule})"
end

#to_sxpString Also known as: to_s

Return SXP representation of this rule

Returns:

  • (String)


143
144
145
146
# File 'lib/ebnf/rule.rb', line 143

def to_sxp
  require 'sxp' unless defined?(SXP)
  for_sxp.to_sxp
end

#to_ttlString

Serializes this rule to an Turtle.

Returns:

  • (String)


153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/ebnf/rule.rb', line 153

def to_ttl
  @ebnf.debug("to_ttl") {inspect} if @ebnf
  comment = orig.to_s.strip.
    gsub(/"""/, '\"\"\"').
    gsub("\\", "\\\\").
    sub(/^\"/, '\"').
    sub(/\"$/m, '\"')
  statements = [
    %{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
    %{  rdfs:comment #{comment.inspect};},
  ]
  
  statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false)
  "\n" + statements.join("\n")
end

#translate_codepoints(str) ⇒ Object

Utility function to translate code points of the form ‘#xN’ into ruby unicode characters



453
454
455
# File 'lib/ebnf/rule.rb', line 453

def translate_codepoints(str)
  str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
end