Class: Syntax::Ruby

Inherits:
Tokenizer show all
Defined in:
lib/syntax/lang/ruby.rb

Overview

A tokenizer for the Ruby language. It recognizes all common syntax (and some less common syntax) but because it is not a true lexer, it will make mistakes on some ambiguous cases.

Constant Summary collapse

KEYWORDS =

The list of all identifiers recognized as keywords.

%w{if then elsif else end begin do rescue ensure while for
class module def yield raise until unless and or not when
case super undef break next redo retry in return alias
defined?}

Instance Attribute Summary

Attributes inherited from Tokenizer

#chunk, #group

Instance Method Summary collapse

Methods inherited from Tokenizer

#finish, #option, #set, #start, #teardown, #tokenize

Instance Method Details

#setupObject

Perform ruby-specific setup



18
19
20
21
22
# File 'lib/syntax/lang/ruby.rb', line 18

def setup
  @selector = false
  @allow_operator = false
  @heredocs = []
end

#stepObject

Step through a single iteration of the tokenization process.



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/syntax/lang/ruby.rb', line 25

def step
  case
    when bol? && check( /=begin/ )
      start_group( :comment, scan_until( /^=end#{EOL}/ ) )
    when bol? && check( /__END__#{EOL}/ )
      start_group( :comment, scan_until( /\Z/ ) )
  else
    case
      when check( /def\s+/ )
        start_group :keyword, scan( /def\s+/ )
        start_group :method,  scan_until( /(?=[;(\s]|#{EOL})/ )
      when check( /class\s+/ )
        start_group :keyword, scan( /class\s+/ )
        start_group :class,  scan_until( /(?=[;\s<]|#{EOL})/ )
      when check( /module\s+/ )
        start_group :keyword, scan( /module\s+/ )
        start_group :module,  scan_until( /(?=[;\s]|#{EOL})/ )
      when check( /::/ )
        start_group :punct, scan(/::/)
      when check( /:"/ )
        start_group :symbol, scan(/:/)
        scan_delimited_region :symbol, :symbol, "", true
        @allow_operator = true
      when check( /:'/ )
        start_group :symbol, scan(/:/)
        scan_delimited_region :symbol, :symbol, "", false
        @allow_operator = true
      when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
        start_group :symbol, matched
        @allow_operator = true
      when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
        start_group :char, matched
        @allow_operator = true
      when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
        if @selector || matched[-1] == ?? || matched[-1] == ?!
          start_group :ident,
            scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
        else
          start_group :constant,
            scan(/(__FILE__|__LINE__|true|false|nil|self)/)
        end
        @selector = false
        @allow_operator = true
      when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
        start_group :number, matched
        @allow_operator = true
      else
        case peek(2)
          when "%r"
            scan_delimited_region :punct, :regex, scan( /../ ), true
            @allow_operator = true
          when "%w", "%q"
            scan_delimited_region :punct, :string, scan( /../ ), false
            @allow_operator = true
          when "%s"
            scan_delimited_region :punct, :symbol, scan( /../ ), false
            @allow_operator = true
          when "%W", "%Q", "%x"
            scan_delimited_region :punct, :string, scan( /../ ), true
            @allow_operator = true
          when /%[^\sa-zA-Z0-9]/
            scan_delimited_region :punct, :string, scan( /./ ), true
            @allow_operator = true
          when "<<"
            saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
            start_group :punct, scan( /<</ )
            if saw_word
              @allow_operator = false
              return
            end

            float_right = scan( /-/ )
            append "-" if float_right
            if ( type = scan( /['"]/ ) )
              append type
              delim = scan_until( /(?=#{type})/ )
              if delim.nil?
                append scan_until( /\Z/ )
                return
              end
            else
              delim = scan( /\w+/ ) or return
            end
            start_group :constant, delim
            start_group :punct, scan( /#{type}/ ) if type
            @heredocs << [ float_right, type, delim ]
            @allow_operator = true
          else
            case peek(1)
              when /[\n\r]/
                unless @heredocs.empty?
                  scan_heredoc(*@heredocs.shift)
                else
                  start_group :normal, scan( /\s+/ )
                end
                @allow_operator = false
              when /\s/
                start_group :normal, scan( /\s+/ )
              when "#"
                start_group :comment, scan( /#[^\n\r]*/ )
              when /[A-Z]/
                start_group @selector ? :ident : :constant, scan( /\w+/ )
                @allow_operator = true
              when /[a-z_]/
                word = scan( /\w+[?!]?/ )
                if !@selector && KEYWORDS.include?( word )
                  start_group :keyword, word
                  @allow_operator = false
                elsif
                  start_group :ident, word
                  @allow_operator = true
                end
                @selector = false
              when /\d/
                start_group :number,
                  scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
                @allow_operator = true
              when '"'
                scan_delimited_region :punct, :string, "", true
                @allow_operator = true
              when '/'
                if @allow_operator
                  start_group :punct, scan(%r{/})
                  @allow_operator = false
                else
                  scan_delimited_region :punct, :regex, "", true
                  @allow_operator = true
                end
              when "'"
                scan_delimited_region :punct, :string, "", false
                @allow_operator = true
              when "."
                dots = scan( /\.{1,3}/ )
                start_group :punct, dots
                @selector = ( dots.length == 1 )
              when /[@]/
                start_group :attribute, scan( /@{1,2}\w*/ )
                @allow_operator = true
              when /[$]/
                start_group :global, scan(/\$/)
                start_group :global, scan( /\w+|./ ) if check(/./)
                @allow_operator = true
              when /[-!?*\/+=<>(\[\{}:;,&|%]/
                start_group :punct, scan(/./)
                @allow_operator = false
              when /[)\]]/
                start_group :punct, scan(/./)
                @allow_operator = true
              else
                # all else just falls through this, to prevent
                # infinite loops...
                append getch
            end
        end
    end
  end
end