Class: Kanocc::Kanocc

Inherits:

Object

Object
Kanocc::Kanocc

show all

Defined in:: lib/kanocc.rb

Instance Attribute Summary collapse

#logger ⇒ Object

Returns the value of attribute logger.
#parser ⇒ Object

Returns the value of attribute parser.

Instance Method Summary collapse

#find_tokens(nonterminal) ⇒ Object
#find_tokens_helper(nonterminal, collected_tokens, visited_nonterminals = {}) ⇒ Object
#initialize(start_symbol) ⇒ Kanocc constructor

Creates a new instance of Kannocc, with the given start symbol.
#parse(input) ⇒ Object

Consume input.
#parse_file(file) ⇒ Object
#report_reduction(rule) ⇒ Object

The parser must call this method when it have decided upon a reduction.
#report_token(lexical_match, terminal) ⇒ Object

The parser must call this method when it consumes a token As argument it should give the LexicalMatch and the matched terminal.
#set_tokens(*tokens) ⇒ Object

Define which tokens Kanocc should recognize.
#set_whitespace(*ws) ⇒ Object

Define whitespace.
#show_grammar_symbol(gs) ⇒ Object
#show_grammar_symbols(tokens) ⇒ Object
#show_stack ⇒ Object

For debugging.

Constructor Details

#initialize(start_symbol) ⇒ `Kanocc`

Creates a new instance of Kannocc, with the given start symbol. From the start_symbol, Kanocc will deduce the grammar and the grammarsymbols

# File 'lib/kanocc.rb', line 89

def initialize(start_symbol)
  @start_symbol = start_symbol
  @logger = Logger.new(STDOUT)
  @logger.datetime_format = "" 
  @logger.level = Logger::WARN
  @scanner = Scanner.new
  @scanner.set_recognized(*find_tokens(@start_symbol))
  @parser = EarleyParser.new(self, @logger)
end

Instance Attribute Details

#logger ⇒ `Object`

Returns the value of attribute logger.



83
84
85

# File 'lib/kanocc.rb', line 83

def logger
  @logger
end

#parser ⇒ `Object`

Returns the value of attribute parser.



83
84
85

# File 'lib/kanocc.rb', line 83

def parser
  @parser
end

Instance Method Details

#find_tokens(nonterminal) ⇒ `Object`

# File 'lib/kanocc.rb', line 199

def find_tokens(nonterminal)   
  collected_tokens = {}
  find_tokens_helper(nonterminal, collected_tokens)
  collected_tokens.keys
end

#find_tokens_helper(nonterminal, collected_tokens, visited_nonterminals = {}) ⇒ `Object`

# File 'lib/kanocc.rb', line 204

def find_tokens_helper(nonterminal, collected_tokens,  visited_nonterminals = {})
  unless visited_nonterminals[nonterminal]
    visited_nonterminals[nonterminal] = true
    nonterminal.rules.each do |r| 
      r.rhs.each do |gs|
        if gs.is_a?(Class) and gs.ancestors.member?(Nonterminal)
          find_tokens_helper(gs, collected_tokens, visited_nonterminals)
        else
          collected_tokens[gs] = true 
        end
      end
    end
  end
end

#parse(input) ⇒ `Object`

Consume input. Kanocc will parse input according to the rules given, and

if parsing succeeds - return an instance of the grammars start symbol.

Input may be a String or an IO object.

# File 'lib/kanocc.rb', line 108

def parse(input)
  if input.is_a?(IO) 
    @input = input.readlines.join("")
  elsif input.is_a?(String) 
    @input = input
  else
    raise "Input must be a string or an IO object"
  end 
  raise "Start symbol not defined" unless @start_symbol
  @input = input
  @scanner.input = input
  @parser.start_symbol = @start_symbol 
  @stack = []
  @parser.parse(@scanner)
  @logger.info("Stack: " + @stack.inspect)
  @stack[0][0]
end

#parse_file(file) ⇒ `Object`

# File 'lib/kanocc.rb', line 126

def parse_file(file)
  if file.is_a? String # Then we assume it's a path	
	file = File.open(File.expand_path(file))
	opened_file = true
  end
  input = file.read
  file.close if opened_file
  parse(input)
end

#report_reduction(rule) ⇒ `Object`

The parser must call this method when it have decided upon a reduction. As arguments it should give the rule, by which to reduce.

# File 'lib/kanocc.rb', line 154

def report_reduction(rule) 
  @logger.info "Reducing by " + rule.inspect
  raise "Fatal: stack too short!" if @stack.length < rule.rhs.length
  nonterminal = rule.lhs.new
  stack_part = @stack.slice!(-rule.rhs.length, rule.rhs.length)
  if rule.rhs.length > 0
    start_pos, end_pos = stack_part[0][1], stack_part[-1][2]
  elsif @stack.length > 0
    start_pos, end_pos =  @stack[-1][2], @stack[-1][2]
  else
    start_pos, end_pos = 0,0
  end 
  if rule.method
	rhs = Rhs.new(stack_part.map{|a| a[0]}, start_pos, end_pos, @input)
    old_rhs = nonterminal.instance_variable_get('@rhs')
    nonterminal.instance_variable_set('@rhs', rhs)
    nonterminal.send(rule.method)
    nonterminal.instance_variable_set('@rhs', old_rhs)
  end
  nonterminal_with_pos = [nonterminal, start_pos, end_pos] 
  @stack.push(nonterminal_with_pos)
  show_stack
end

#report_token(lexical_match, terminal) ⇒ `Object`

The parser must call this method when it consumes a token As argument it should give the LexicalMatch and the matched terminal.

# File 'lib/kanocc.rb', line 180

def report_token(lexical_match, terminal)
  start_pos = lexical_match.start_pos
  length = lexical_match.length
  stringpart = @input.slice(start_pos, length)
  if terminal.class == Class # It's a token
	instance = terminal.new
	regexp = lexical_match.regexp(terminal)
	instance.m = regexp.match(stringpart)
    if method = terminal.method(regexp)
	  instance.send(method)
	end
  else # It's a string literal
	instance = terminal
  end

  @stack.push([instance, start_pos, start_pos + length])
  show_stack
end

#set_tokens(*tokens) ⇒ `Object`

Define which tokens Kanocc should recognize. If this method is not called Kanocc will scan for those tokens that are mentioned in the grammar. tokens= takes a variable number of arguments. Each argument must either be a string or a class which is a subclass of Kanocc::Token



148
149
150

# File 'lib/kanocc.rb', line 148

def set_tokens(*tokens)
  @scanner.set_recognized(*tokens)
end

#set_whitespace(*ws) ⇒ `Object`

Define whitespace. By default, Kanocc will recogninze anything that matches /s/ as whitespace. whitespace takes a variable number of arguments, each of which must be a regular expression.



140
141
142

# File 'lib/kanocc.rb', line 140

def set_whitespace(*ws)
  @scanner.set_whitespace(*ws)
end

#show_grammar_symbol(gs) ⇒ `Object`

# File 'lib/kanocc.rb', line 228

def show_grammar_symbol(gs) 
  if gs.is_a?(Token)
    "#{gs.class}(#{gs.m[0].inspect}, #{gs.start_pos}, #{gs.end_pos})" 
  elsif gs.is_a?(Nonterminal) 
    "#{gs.class}(#{gs.start_pos}, #{gs.end_pos})"
  else 
    gs.inspect
  end
end

#show_grammar_symbols(tokens) ⇒ `Object`



224
225
226

# File 'lib/kanocc.rb', line 224

def show_grammar_symbols(tokens)
  "[" + tokens.map{|token| show_grammar_symbol(token)}.join(", ") + "]"
end

#show_stack ⇒ `Object`

For debugging



220
221
222

# File 'lib/kanocc.rb', line 220

def show_stack
  @logger.info("Stack: #{@stack.inspect}") if @logger
end

Class: Kanocc::Kanocc

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(start_symbol) ⇒ Kanocc

Instance Attribute Details

#logger ⇒ Object

#parser ⇒ Object

Instance Method Details

#find_tokens(nonterminal) ⇒ Object

#find_tokens_helper(nonterminal, collected_tokens, visited_nonterminals = {}) ⇒ Object

#parse(input) ⇒ Object

#parse_file(file) ⇒ Object

#report_reduction(rule) ⇒ Object

#report_token(lexical_match, terminal) ⇒ Object

#set_tokens(*tokens) ⇒ Object

#set_whitespace(*ws) ⇒ Object

#show_grammar_symbol(gs) ⇒ Object

#show_grammar_symbols(tokens) ⇒ Object

#show_stack ⇒ Object