Class: Shoes::Highlighter::Syntax::Ruby

Inherits:
Tokenizer
  • Object
show all
Defined in:
lib/shoes/highlighter/lang/ruby.rb

Overview

A tokenizer for the Ruby language. It recognizes all common syntax (and some less common syntax) but because it is not a true lexer, it will make mistakes on some ambiguous cases.

Constant Summary collapse

KEYWORDS =

The list of all identifiers recognized as keywords.

%w(if then elsif else end begin do rescue ensure while for
class module def yield raise until unless and or not when
case super undef break next redo retry in return alias
defined?)

Instance Attribute Summary

Attributes inherited from Tokenizer

#chunk, #group

Instance Method Summary collapse

Methods inherited from Tokenizer

#finish, #option, #set, #start, #teardown, #tokenize

Instance Method Details

#setupObject

Perform ruby-specific setup



16
17
18
19
20
# File 'lib/shoes/highlighter/lang/ruby.rb', line 16

def setup
  @selector = false
  @allow_operator = false
  @heredocs = []
end

#stepObject

Step through a single iteration of the tokenization process.



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/shoes/highlighter/lang/ruby.rb', line 23

def step
  case
  when bol? && check(/=begin/)
    start_group(:comment, scan_until(/^=end#{EOL}/))
  when bol? && check(/__END__#{EOL}/)
    start_group(:comment, scan_until(/\Z/))
  else
    case
    when check(/def\s+/)
      start_group :keyword, scan(/def\s+/)
      start_group :method,  scan_until(/(?=[;(\s]|#{EOL})/)
    when check(/class\s+/)
      start_group :keyword, scan(/class\s+/)
      start_group :class,  scan_until(/(?=[;\s<]|#{EOL})/)
    when check(/module\s+/)
      start_group :keyword, scan(/module\s+/)
      start_group :module,  scan_until(/(?=[;\s]|#{EOL})/)
    when check(/::/)
      start_group :punct, scan(/::/)
    when check(/:"/)
      start_group :symbol, scan(/:/)
      scan_delimited_region :symbol, :symbol, "", true
      @allow_operator = true
    when check(/:'/)
      start_group :symbol, scan(/:/)
      scan_delimited_region :symbol, :symbol, "", false
      @allow_operator = true
    when scan(/:[_a-zA-Z@$][$@\w]*[=!?]?/)
      start_group :symbol, matched
      @allow_operator = true
    when scan(/\?(\\[^\n\r]|[^\\\n\r\s])/)
      start_group :char, matched
      @allow_operator = true
    when check(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
      if @selector || matched[-1] == '?' || matched[-1] == '!'
        start_group :ident,
                    scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
      else
        start_group :constant,
                    scan(/(__FILE__|__LINE__|true|false|nil|self)/)
      end
      @selector = false
      @allow_operator = true
    when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
      start_group :number, matched
      @allow_operator = true
    else
      case peek(2)
      when "%r"
        scan_delimited_region :punct, :regex, scan(/../), true
        @allow_operator = true
      when "%w", "%q"
        scan_delimited_region :punct, :string, scan(/../), false
        @allow_operator = true
      when "%s"
        scan_delimited_region :punct, :symbol, scan(/../), false
        @allow_operator = true
      when "%W", "%Q", "%x"
        scan_delimited_region :punct, :string, scan(/../), true
        @allow_operator = true
      when /%[^\sa-zA-Z0-9]/
        scan_delimited_region :punct, :string, scan(/./), true
        @allow_operator = true
      when "<<"
        saw_word = (chunk[-1, 1] =~ /[\w!?]/)
        start_group :punct, scan(/<</)
        if saw_word
          @allow_operator = false
          return
        end

        float_right = scan(/-/)
        append "-" if float_right
        if (type = scan(/['"]/))
          append type
          delim = scan_until(/(?=#{type})/)
          if delim.nil?
            append scan_until(/\Z/)
            return
          end
        else
          delim = scan(/\w+/) or return
        end
        start_group :constant, delim
        start_group :punct, scan(/#{type}/) if type
        @heredocs << [float_right, type, delim]
        @allow_operator = true
      else
        case peek(1)
        when /[\n\r]/
          unless @heredocs.empty?
            scan_heredoc(*@heredocs.shift)
          else
            start_group :normal, scan(/\s+/)
          end
          @allow_operator = false
        when /\s/
          start_group :normal, scan(/\s+/)
        when "#"
          start_group :comment, scan(/#[^\n\r]*/)
        when /[A-Z]/
          start_group @selector ? :ident : :constant, scan(/\w+/)
          @allow_operator = true
        when /[a-z_]/
          word = scan(/\w+[?!]?/)
          if !@selector && KEYWORDS.include?(word)
            start_group :keyword, word
            @allow_operator = false
          elsif
            start_group :ident, word
            @allow_operator = true
          end
          @selector = false
        when /\d/
          start_group :number,
                      scan(/[\d_]+(\.[\d_]+)?([eE][\d_]+)?/)
          @allow_operator = true
        when '"'
          scan_delimited_region :punct, :string, "", true
          @allow_operator = true
        when '/'
          if @allow_operator
            start_group :punct, scan(%r{/})
            @allow_operator = false
          else
            scan_delimited_region :punct, :regex, "", true
            @allow_operator = true
          end
        when "'"
          scan_delimited_region :punct, :string, "", false
          @allow_operator = true
        when "."
          dots = scan(/\.{1,3}/)
          start_group :punct, dots
          @selector = (dots.length == 1)
        when /[@]/
          start_group :attribute, scan(/@{1,2}\w*/)
          @allow_operator = true
        when /[$]/
          start_group :global, scan(/\$/)
          start_group :global, scan(/\w+|./) if check(/./)
          @allow_operator = true
        when /[-!?*\/+=<>(\[\{}:;,&|%]/
          start_group :punct, scan(/./)
          @allow_operator = false
        when /[)\]]/
          start_group :punct, scan(/./)
          @allow_operator = true
        else
          # all else just falls through this, to prevent
          # infinite loops...
          append getch
        end
      end
    end
  end
end