Class: OedipusLex

Inherits:
Object
  • Object
show all
Defined in:
lib/oedipus_lex.rex.rb,
lib/oedipus_lex.rb

Overview

Header Part

“class” Foo [“option”

[options] ]

[“inner”

[methods] ]

[“macro”

[macro-name  /pattern/[flags]] ]

“rule”

[:state | method_name]  /pattern/[flags]  [{ code } | method_name | :state]

“end”

Footer Part

Defined Under Namespace

Classes: ScanError

Constant Summary collapse

VERSION =
"2.1.0"
DEFAULTS =
{
  :debug    => false,
  :do_parse => false,
  :lineno   => false,
  :stub     => false,
}
TEMPLATE =
<<-'REX'.gsub(/^ {6}/, '\1')
    #--
    # This file is automatically generated. Do not modify it.
    # Generated by: oedipus_lex version <%= VERSION %>.
% if filename then
    # Source: <%= filename %>
% end
    #++

% unless header.empty? then
%   header.each do |s|
    <%= s %>
%   end

% end
    class <%= class_name %>
      require 'strscan'

% unless macros.empty? then
%   max = macros.map { |(k,_)| k.size }.max
%   macros.each do |(k,v)|
      <%= "%-#{max}s = %s" % [k, v] %>
%   end

% end
      class ScanError < StandardError ; end

      attr_accessor :lineno
      attr_accessor :filename
      attr_accessor :ss
      attr_accessor :state

      alias :match :ss

      def matches
        m = (1..9).map { |i| ss[i] }
        m.pop until m[-1] or m.empty?
        m
      end

      def action
        yield
      end

% if option[:do_parse] then
      def do_parse
        while token = next_token do
          type, *vals = token

          send "lex_#{type}", *vals
        end
      end

% end
      def scanner_class
        StringScanner
      end unless instance_methods(false).map(&:to_s).include?("scanner_class")

      def parse str
        self.ss     = scanner_class.new str
        self.lineno = 1
        self.state  ||= nil

        do_parse
      end

      def parse_file path
        self.filename = path
        open path do |f|
          parse f.read
        end
      end

      def next_token
% starts.each do |s|
        <%= s %>
% end
% if option[:lineno] then
        self.lineno += 1 if ss.peek(1) == "\n"
% end

        token = nil

        until ss.eos? or token do
          token =
            case state
% all_states.each do |the_states|
%   exclusive = the_states.first != nil
%   all_states, predicates = the_states.partition { |s| s.nil? or s.start_with? ":" }
%   filtered_states = the_states.select { |s| s.nil? or s.start_with? ":" }
            when <%= all_states.map { |s| s || "nil" }.join ", " %> then
              case
%   all_states.each do |state|
%     rules.each do |rule|
%       start_state, rule_expr, rule_action = *rule
%       if start_state == state or (state.nil? and predicates.include? start_state) then
%         if start_state and not exclusive then
%           if start_state =~ /^:/ then
              when (state == <%= start_state %>) && (text = ss.scan(<%= rule_expr %>)) then
%           else
              when <%= start_state %> && (text = ss.scan(<%= rule_expr %>)) then
%           end
%         else
              when text = ss.scan(<%= rule_expr %>) then
%         end
%         if rule_action then
%           case rule_action
%           when /^\{/ then
                action <%= rule_action %>
%           when /^:/, "nil" then
                [:state, <%= rule_action %>]
%           else
                <%= rule_action %> text
%           end
%         else
                # do nothing
%         end
%       end # start_state == state
%     end # rules.each
%   end # the_states.each
              else
                text = ss.string[ss.pos .. -1]
                raise ScanError, "can not match (#{state.inspect}): '#{text}'"
              end
% end # all_states
            else
              raise ScanError, "undefined state: '#{state}'"
            end # token = case state

          next unless token # allow functions to trigger redo w/ nil
        end # while

        raise "bad lexical result: #{token.inspect}" unless
          token.nil? || (Array === token && token.size >= 2)

        # auto-switch state
        self.state = token.last if token && token.first == :state

% if option[:debug] then
        p [state, token]
% end
        token
      end # def _next_token
% inners.each do |s|
      <%= s %>
% end
    end # class
% unless ends.empty? then

%   ends.each do |s|
      <%= s %>
%   end
% end
% if option[:stub] then

    if __FILE__ == $0
      ARGV.each do |path|
        rex = <%= class_name %>.new

        def rex.do_parse
          while token = self.next_token
            p token
          end
        end

        begin
          rex.parse_file path
        rescue
          $stderr.printf "%s:%d:%s\n", rex.filename, rex.lineno, $!.message
          exit 1
        end
      end
    end
% end
REX
ST =
/(?:(:\S+|\w+\??))/
RE =
/(\/(?:\\.|[^\/])+\/[ion]?)/
ACT =
/(\{.*|:?\w+)/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ OedipusLex

Returns a new instance of OedipusLex.



25
26
27
28
29
30
31
32
33
34
35
# File 'lib/oedipus_lex.rb', line 25

def initialize opts = {}
  self.option     = DEFAULTS.merge opts
  self.class_name = nil

  self.header  = []
  self.ends    = []
  self.inners  = []
  self.macros  = []
  self.rules   = []
  self.starts  = []
end

Instance Attribute Details

#class_nameObject

Returns the value of attribute class_name.



9
10
11
# File 'lib/oedipus_lex.rb', line 9

def class_name
  @class_name
end

#endsObject

Returns the value of attribute ends.



11
12
13
# File 'lib/oedipus_lex.rb', line 11

def ends
  @ends
end

#filenameObject

Returns the value of attribute filename.



30
31
32
# File 'lib/oedipus_lex.rex.rb', line 30

def filename
  @filename
end

#headerObject

Returns the value of attribute header.



10
11
12
# File 'lib/oedipus_lex.rb', line 10

def header
  @header
end

#innersObject

Returns the value of attribute inners.



12
13
14
# File 'lib/oedipus_lex.rb', line 12

def inners
  @inners
end

#linenoObject

Returns the value of attribute lineno.



29
30
31
# File 'lib/oedipus_lex.rex.rb', line 29

def lineno
  @lineno
end

#macrosObject

Returns the value of attribute macros.



13
14
15
# File 'lib/oedipus_lex.rb', line 13

def macros
  @macros
end

#optionObject

Returns the value of attribute option.



14
15
16
# File 'lib/oedipus_lex.rb', line 14

def option
  @option
end

#rulesObject

Returns the value of attribute rules.



15
16
17
# File 'lib/oedipus_lex.rb', line 15

def rules
  @rules
end

#ssObject Also known as: match

Returns the value of attribute ss.



31
32
33
# File 'lib/oedipus_lex.rex.rb', line 31

def ss
  @ss
end

#startsObject

Returns the value of attribute starts.



16
17
18
# File 'lib/oedipus_lex.rb', line 16

def starts
  @starts
end

#stateObject

Returns the value of attribute state.



32
33
34
# File 'lib/oedipus_lex.rex.rb', line 32

def state
  @state
end

Instance Method Details

#actionObject



42
43
44
# File 'lib/oedipus_lex.rex.rb', line 42

def action
  yield
end

#do_parseObject



46
47
48
49
50
51
52
# File 'lib/oedipus_lex.rex.rb', line 46

def do_parse
  while token = next_token do
    type, *vals = token

    send "lex_#{type}", *vals
  end
end

#generateObject



78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/oedipus_lex.rb', line 78

def generate
  states                 = rules.map(&:first).compact.uniq
  exclusives, inclusives = states.partition { |s| s =~ /^:[A-Z]/ }

  # NOTE: doubling up assignment to remove unused var warnings in
  # ERB binding.

  all_states =
    all_states = [[nil,                        # non-state # eg [[nil,
                   *inclusives],               # incls     #      :a, :b],
                  *exclusives.map { |s| [s] }] # [excls]   #     [:A], [:B]]

  ERB.new(TEMPLATE, nil, "%").result binding
end

#lex_class(prefix, name) ⇒ Object



37
38
39
40
# File 'lib/oedipus_lex.rb', line 37

def lex_class prefix, name
  header.concat prefix.split(/\n/)
  self.class_name = name
end

#lex_comment(line) ⇒ Object



42
43
44
# File 'lib/oedipus_lex.rb', line 42

def lex_comment line
  # do nothing
end

#lex_end(line) ⇒ Object



46
47
48
# File 'lib/oedipus_lex.rb', line 46

def lex_end line
  ends << line
end

#lex_inner(line) ⇒ Object



50
51
52
# File 'lib/oedipus_lex.rb', line 50

def lex_inner line
  inners << line
end

#lex_macro(name, value) ⇒ Object



58
59
60
# File 'lib/oedipus_lex.rb', line 58

def lex_macro name, value
  macros << [name, value]
end

#lex_option(option) ⇒ Object



62
63
64
# File 'lib/oedipus_lex.rb', line 62

def lex_option option
  self.option[option.to_sym] = true
end

#lex_rule(start_state, regexp, action = nil) ⇒ Object



66
67
68
# File 'lib/oedipus_lex.rb', line 66

def lex_rule start_state, regexp, action = nil
  rules << [start_state, regexp, action]
end

#lex_rule2(*vals) ⇒ Object



70
71
72
# File 'lib/oedipus_lex.rb', line 70

def lex_rule2(*vals)
  raise vals.inspect
end

#lex_start(line) ⇒ Object



54
55
56
# File 'lib/oedipus_lex.rb', line 54

def lex_start line
  starts << line.strip
end

#lex_state(new_state) ⇒ Object



74
75
76
# File 'lib/oedipus_lex.rb', line 74

def lex_state new_state
  # do nothing -- lexer switches state for us
end

#matchesObject



36
37
38
39
40
# File 'lib/oedipus_lex.rex.rb', line 36

def matches
  m = (1..9).map { |i| ss[i] }
  m.pop until m[-1] or m.empty?
  m
end

#next_tokenObject



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/oedipus_lex.rex.rb', line 73

def next_token
  self.lineno += 1 if ss.peek(1) == "\n"

  token = nil

  until ss.eos? or token do
    token =
      case state
      when nil, :option, :inner, :start, :macro, :rule then
        case
        when text = ss.scan(/options?.*/) then
          [:state, :option]
        when text = ss.scan(/inner.*/) then
          [:state, :inner]
        when text = ss.scan(/macros?.*/) then
          [:state, :macro]
        when text = ss.scan(/rules?.*/) then
          [:state, :rule]
        when text = ss.scan(/start.*/) then
          [:state, :start]
        when text = ss.scan(/end/) then
          [:state, :END]
        when text = ss.scan(/\A((?:.|\n)*)class ([\w:]+.*)/) then
          action { [:class, *matches] }
        when text = ss.scan(/\n+/) then
          # do nothing
        when text = ss.scan(/\s*(\#.*)/) then
          action { [:comment, text] }
        when (state == :option) && (text = ss.scan(/\s+/)) then
          # do nothing
        when (state == :option) && (text = ss.scan(/stub/i)) then
          action { [:option, text] }
        when (state == :option) && (text = ss.scan(/debug/i)) then
          action { [:option, text] }
        when (state == :option) && (text = ss.scan(/do_parse/i)) then
          action { [:option, text] }
        when (state == :option) && (text = ss.scan(/lineno/i)) then
          action { [:option, text] }
        when (state == :inner) && (text = ss.scan(/.*/)) then
          action { [:inner, text] }
        when (state == :start) && (text = ss.scan(/.*/)) then
          action { [:start, text] }
        when (state == :macro) && (text = ss.scan(/\s+(\w+)\s+#{RE}/o)) then
          action { [:macro, *matches] }
        when (state == :rule) && (text = ss.scan(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then
          action { [:rule, *matches] }
        else
          text = ss.string[ss.pos .. -1]
          raise ScanError, "can not match (#{state.inspect}): '#{text}'"
        end
      when :END then
        case
        when text = ss.scan(/\n+/) then
          # do nothing
        when text = ss.scan(/.*/) then
          action { [:end, text] }
        else
          text = ss.string[ss.pos .. -1]
          raise ScanError, "can not match (#{state.inspect}): '#{text}'"
        end
      else
        raise ScanError, "undefined state: '#{state}'"
      end # token = case state

    next unless token # allow functions to trigger redo w/ nil
  end # while

  raise "bad lexical result: #{token.inspect}" unless
    token.nil? || (Array === token && token.size >= 2)

  # auto-switch state
  self.state = token.last if token && token.first == :state

  token
end

#parse(str) ⇒ Object



58
59
60
61
62
63
64
# File 'lib/oedipus_lex.rex.rb', line 58

def parse str
  self.ss     = scanner_class.new str
  self.lineno = 1
  self.state  ||= nil

  do_parse
end

#parse_file(path) ⇒ Object



66
67
68
69
70
71
# File 'lib/oedipus_lex.rex.rb', line 66

def parse_file path
  self.filename = path
  open path do |f|
    parse f.read
  end
end

#scanner_classObject



54
55
56
# File 'lib/oedipus_lex.rex.rb', line 54

def scanner_class
  StringScanner
end