Class: Kanocc::Kanocc

Inherits:
Object
  • Object
show all
Defined in:
lib/kanocc.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(start_symbol) ⇒ Kanocc

Creates a new instance of Kannocc, with the given start symbol. From the start_symbol, Kanocc will deduce the grammar and the grammarsymbols



89
90
91
92
93
94
95
96
97
# File 'lib/kanocc.rb', line 89

def initialize(start_symbol)
  @start_symbol = start_symbol
  @logger = Logger.new(STDOUT)
  @logger.datetime_format = "" 
  @logger.level = Logger::WARN
  @scanner = Scanner.new
  @scanner.set_recognized(*find_tokens(@start_symbol))
  @parser = EarleyParser.new(self, @logger)
end

Instance Attribute Details

#loggerObject

Returns the value of attribute logger.



83
84
85
# File 'lib/kanocc.rb', line 83

def logger
  @logger
end

#parserObject

Returns the value of attribute parser.



83
84
85
# File 'lib/kanocc.rb', line 83

def parser
  @parser
end

Instance Method Details

#find_tokens(nonterminal) ⇒ Object



199
200
201
202
203
# File 'lib/kanocc.rb', line 199

def find_tokens(nonterminal)   
  collected_tokens = {}
  find_tokens_helper(nonterminal, collected_tokens)
  collected_tokens.keys
end

#find_tokens_helper(nonterminal, collected_tokens, visited_nonterminals = {}) ⇒ Object



204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/kanocc.rb', line 204

def find_tokens_helper(nonterminal, collected_tokens,  visited_nonterminals = {})
  unless visited_nonterminals[nonterminal]
    visited_nonterminals[nonterminal] = true
    nonterminal.rules.each do |r| 
      r.rhs.each do |gs|
        if gs.is_a?(Class) and gs.ancestors.member?(Nonterminal)
          find_tokens_helper(gs, collected_tokens, visited_nonterminals)
        else
          collected_tokens[gs] = true 
        end
      end
    end
  end
end

#parse(input) ⇒ Object

Consume input. Kanocc will parse input according to the rules given, and

  • if parsing succeeds - return an instance of the grammars start symbol.

Input may be a String or an IO object.



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/kanocc.rb', line 108

def parse(input)
  if input.is_a?(IO) 
    @input = input.readlines.join("")
  elsif input.is_a?(String) 
    @input = input
  else
    raise "Input must be a string or an IO object"
  end 
  raise "Start symbol not defined" unless @start_symbol
  @input = input
  @scanner.input = input
  @parser.start_symbol = @start_symbol 
  @stack = []
  @parser.parse(@scanner)
  @logger.info("Stack: " + @stack.inspect)
  @stack[0][0]
end

#parse_file(file) ⇒ Object



126
127
128
129
130
131
132
133
134
# File 'lib/kanocc.rb', line 126

def parse_file(file)
  if file.is_a? String # Then we assume it's a path	
	file = File.open(File.expand_path(file))
	opened_file = true
  end
  input = file.read
  file.close if opened_file
  parse(input)
end

#report_reduction(rule) ⇒ Object

The parser must call this method when it have decided upon a reduction. As arguments it should give the rule, by which to reduce.



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/kanocc.rb', line 154

def report_reduction(rule) 
  @logger.info "Reducing by " + rule.inspect
  raise "Fatal: stack too short!" if @stack.length < rule.rhs.length
  nonterminal = rule.lhs.new
  stack_part = @stack.slice!(-rule.rhs.length, rule.rhs.length)
  if rule.rhs.length > 0
    start_pos, end_pos = stack_part[0][1], stack_part[-1][2]
  elsif @stack.length > 0
    start_pos, end_pos =  @stack[-1][2], @stack[-1][2]
  else
    start_pos, end_pos = 0,0
  end 
  if rule.method
	rhs = Rhs.new(stack_part.map{|a| a[0]}, start_pos, end_pos, @input)
    old_rhs = nonterminal.instance_variable_get('@rhs')
    nonterminal.instance_variable_set('@rhs', rhs)
    nonterminal.send(rule.method)
    nonterminal.instance_variable_set('@rhs', old_rhs)
  end
  nonterminal_with_pos = [nonterminal, start_pos, end_pos] 
  @stack.push(nonterminal_with_pos)
  show_stack
end

#report_token(lexical_match, terminal) ⇒ Object

The parser must call this method when it consumes a token As argument it should give the LexicalMatch and the matched terminal.



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/kanocc.rb', line 180

def report_token(lexical_match, terminal)
  start_pos = lexical_match.start_pos
  length = lexical_match.length
  stringpart = @input.slice(start_pos, length)
  if terminal.class == Class # It's a token
	instance = terminal.new
	regexp = lexical_match.regexp(terminal)
	instance.m = regexp.match(stringpart)
    if method = terminal.method(regexp)
	  instance.send(method)
	end
  else # It's a string literal
	instance = terminal
  end

  @stack.push([instance, start_pos, start_pos + length])
  show_stack
end

#set_tokens(*tokens) ⇒ Object

Define which tokens Kanocc should recognize. If this method is not called Kanocc will scan for those tokens that are mentioned in the grammar. tokens= takes a variable number of arguments. Each argument must either be a string or a class which is a subclass of Kanocc::Token



148
149
150
# File 'lib/kanocc.rb', line 148

def set_tokens(*tokens)
  @scanner.set_recognized(*tokens)
end

#set_whitespace(*ws) ⇒ Object

Define whitespace. By default, Kanocc will recogninze anything that matches /s/ as whitespace. whitespace takes a variable number of arguments, each of which must be a regular expression.



140
141
142
# File 'lib/kanocc.rb', line 140

def set_whitespace(*ws)
  @scanner.set_whitespace(*ws)
end

#show_grammar_symbol(gs) ⇒ Object



228
229
230
231
232
233
234
235
236
# File 'lib/kanocc.rb', line 228

def show_grammar_symbol(gs) 
  if gs.is_a?(Token)
    "#{gs.class}(#{gs.m[0].inspect}, #{gs.start_pos}, #{gs.end_pos})" 
  elsif gs.is_a?(Nonterminal) 
    "#{gs.class}(#{gs.start_pos}, #{gs.end_pos})"
  else 
    gs.inspect
  end
end

#show_grammar_symbols(tokens) ⇒ Object



224
225
226
# File 'lib/kanocc.rb', line 224

def show_grammar_symbols(tokens)
  "[" + tokens.map{|token| show_grammar_symbol(token)}.join(", ") + "]"
end

#show_stackObject

For debugging



220
221
222
# File 'lib/kanocc.rb', line 220

def show_stack
  @logger.info("Stack: #{@stack.inspect}") if @logger
end