Class: Syntax::CSS21

Inherits:
Tokenizer show all
Defined in:
lib/syntax/lang/css21.rb

Constant Summary collapse

CSS21_PROPERTIES =
Set.new %w{font-family font-style font-variant font-weight
font-size font background-color background-image
background-repeat background-attachment background-position
color background word-spacing letter-spacing
border-top-width border-right-width border-left-width
border-bottom-width border-width list-style-type
list-style-image list-style-position text-decoration
vertical-align text-transform text-align text-indent
line-height margin-top margin-right margin-bottom
margin-left margin padding-top padding-right padding-bottom
padding-left padding border-top border-right border-bottom
border-left border width height float clear display
list-style white-space border-style border-color
azimuth border-bottom-color border-bottom-style
border-collapse border-left-color border-left-style
border-right-color border-right-style border-top-color
border-top-style caption-side cell-spacing clip column-span
content cue cue-after cue-before cursor direction
elevation font-size-adjust marks max-height max-width
min-height min-width orphans overflow page-break-after
page-break-before pause pause-after pause-before pitch
pitch-range play-during position richness right row-span
size speak speak-date speak-header speak-punctuation
speak-time speech-rate stress table-layout text-shadow top
visibility voice-family volume
widows z-index quotes
marker-offset outline outline-color outline-style outline-width
border-spacing border-collapse
page-break-before page-break-after page-break-inside
orphans widows}
CSS21_KEYWORDS =
Set.new %w{maroon red orange yellow olive purple
fuchsia white lime green navy blue aqua teal black silver gray
scroll fixed transparent none top center bottom
left right repeat repeat-x repeat-y no-repeat
thin medium thick dotted dashed solid double groove ridge
inset outset both block inline list-item
xx-small x-small small medium large x-large xx-large
smaller italic oblique small-caps bold bolder lighter auto
disc circle square decimal lower-roman upper-roman lower-alpha
upper-alpha inside outside justify underline overline line-through
blink capitalize uppercase lowercase baseline sub super
top text-top middle bottom text-bottom pre nowrap
compact run-in inherit caption icon menu message-box small-caption
status-bar marker
table inline-table table-column-group table-column
table-row-group table-row table-cell table-caption
table-header-group table-footer-group
screen print projection braille embosed aural tv
tty handheld cross hidden open-quote close-quote
absolute relative normal collapse
serif sans-serif monospace cursive
fantasy, always}
HTML_TAGS =
Set.new %w{a abbr address area article aside
audio b base bdo blockquote body br button canvas caption
cite code col colgroup command datalist dd del details dfn
div dl dt em embed fieldset figure footer form h1 h2 h3
h4 h5 h6 head header hgroup hr html i iframe img input ins
kbd keygen label legend li link map mark menu meta meter
nav noscript object ol optgroup option output p param pre
progress q rp rt ruby samp script section select small
source span strong style sub sup table tbody td textarea
tfoot th thead time title tr ul var video
acronym applet big center frame frameset isindex marquee
noframes s tt u}
MATHML_TAGS =
Set.new %w{annotation annotation-xml maction
malign maligngroup malignmark malignscope math menclose
merror mfenced mfrac mglyph mi mlabeledtr mlongdiv mmultiscripts
mn mo mover mpadded mphantom mprescripts mroot mrow ms mscarries
mscarry msgroup msline mspace msqrt msrow mstack mstyle msub
msubsup msup mtable mtd mtext mtr munder munderover none
semantics}
SVG_TAGS =
Set.new %w{a altGlyph altGlyphDef altGlyphItem animate
animateColor animateMotion animateTransform circle clipPath
color-profile cursor definition-src defs desc ellipse feBlend
feColorMatrix feComponentTransfer feComposite feConvolveMatrix
feDiffuseLighting feDisplacementMap feDistantLight feFlood feFuncA
feFuncB feFuncG feFuncR feGaussianBlur feImage feMerge feMergeNode
feMorphology feOffset fePointLight feSpecularLighting feSpotLight
feTile feTurbulence filter font font-face font-face-format
font-face-name font-face-src font-face-uri foreignObject g glyph
glyphRef hkern image line linearGradient marker mask metadata
missing-glyph mpath path pattern polygon polyline radialGradient
rect script set stop style svg switch symbol text textPath title
tref tspan use view vkern}
MY_TAGS =
HTML_TAGS + MATHML_TAGS + SVG_TAGS

Instance Attribute Summary

Attributes inherited from Tokenizer

#chunk, #group

Instance Method Summary collapse

Methods inherited from Tokenizer

#finish, #option, #set, #start, #teardown, #tokenize

Instance Method Details

#setupObject



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/syntax/lang/css21.rb', line 98

def setup
  @selector = true
  @macros = {}
  @tokens = {}

  # http://www.w3.org/TR/CSS21/syndata.html
  macro(:h, /([0-9a-fA-F])/ ) # uppercase A-Z added?
  macro(:nonascii, /([^\000-\177])/ )
  macro(:nl, /(\n|\r\n|\r|\f)/ )
  macro(:unicode, /(\\#{m(:h)}{1,6}(\r\n|[ \t\r\n\f])?)/ )
  macro(:escape, /(#{m(:unicode)}|\\[^\r\n\f0-9a-f])/ )
  macro(:nmstart, /([_a-z]|#{m(:nonascii)}|#{m(:escape)})/ )
  macro(:nmchar, /([_a-z0-9-]|#{m(:nonascii)}|#{m(:escape)})/ )
  macro(:string1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*\")/ )
  macro(:string2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*\')/ )
  macro(:invalid1, /(\"([^\n\r\f\\\"]|\\#{m(:nl)}|#{m(:escape)})*)/ )
  macro(:invalid2, /(\'([^\n\r\f\\']|\\#{m(:nl)}|#{m(:escape)})*)/ )
  macro(:comment, /(\/\*[^*]*\*+([^\/*][^*]*\*+)*\/)/ )
  macro(:ident, /(-?#{m(:nmstart)}#{m(:nmchar)}*)/ )
  macro(:name, /(#{m(:nmchar)}+)/ )
  macro(:num, /([0-9]+|[0-9]*\.[0-9]+)/ )
  macro(:string, /(#{m(:string1)}|#{m(:string2)})/ )
  macro(:invalid, /(#{m(:invalid1)}|#{m(:invalid2)})/ )
  macro(:s, /([ \t\r\n\f]+)/ )
  macro(:w, /(#{m(:s)}?)/ )
  macro(:A, /(a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?)/ )
  macro(:C, /(c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?)/ )
  macro(:D, /(d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?)/ )
  macro(:E, /(e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?)/ )
  macro(:G, /(g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g)/ )
  macro(:H, /(h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h)/ )
  macro(:I, /(i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i)/ )
  macro(:K, /(k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k)/ )
  macro(:M, /(m|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m)/ )
  macro(:N, /(n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n)/ )
  macro(:O, /(o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o)/ )
  macro(:P, /(p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p)/ )
  macro(:R, /(r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r)/ )
  macro(:S, /(s|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s)/ )
  macro(:T, /(t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t)/ )
  macro(:X, /(x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x)/ )
  macro(:Z, /(z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z)/ )

  token(:COMMENT, /#{m(:comment)}/)

  token(:HASH, /\#/)
  token(:IDENT, /#{m(:ident)}/)
  token(:LBRACE, /#{m(:w)}\{/)
  token(:RBRACE, /#{m(:w)}\}/)

  token(:S, /#{m(:s)}/)

  token(:FUNCTION, /#{m(:ident)}(?=\()/)

  token(:PLUS, /#{m(:w)}\+/)
  token(:GREATER, /#{m(:w)}>/)
  token(:COMMA, /#{m(:w)},/)

  token(:CDO, /<!--/)
  token(:CDC, /-->/)
  token(:INCLUDES, /~=/)
  token(:DASHMATCH, /\|=/)
  token(:STRING, /#{m(:string)}/)
  token(:INVALID, /#{m(:invalid)}/)
  token(:IMPORT_SYM, /@#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}/)
  token(:PAGE_SYM, /@#{m(:P)}#{m(:A)}#{m(:G)}#{m(:E)}/)
  token(:MEDIA_SYM, /@#{m(:M)}#{m(:E)}#{m(:D)}#{m(:I)}#{m(:A)}/)
  token(:CHARSET_SYM, /@#{m(:C)}#{m(:H)}#{m(:A)}#{m(:R)}#{m(:S)}#{m(:E)}#{m(:T)}/)
  token(:IMPORTANT_SYM, /!(#{m(:w)}|#{m(:comment)})*#{m(:I)}#{m(:M)}#{m(:P)}#{m(:O)}#{m(:R)}#{m(:T)}#{m(:A)}#{m(:N)}#{m(:T)}/)
  token(:EMS, /#{m(:num)}#{m(:E)}#{m(:M)}/)
  token(:EXS, /#{m(:num)}#{m(:E)}#{m(:X)}/)

  token :LENGTH do |patterns|
    patterns << /#{m(:num)}#{m(:P)}#{m(:X)}/
    patterns << /#{m(:num)}#{m(:C)}#{m(:M)}/
    patterns << /#{m(:num)}#{m(:M)}#{m(:M)}/
    patterns << /#{m(:num)}#{m(:I)}#{m(:N)}/
    patterns << /#{m(:num)}#{m(:P)}#{m(:T)}/
    patterns << /#{m(:num)}#{m(:P)}#{m(:C)}/
  end

  token :ANGLE do |patterns|
    patterns << /#{m(:num)}#{m(:D)}#{m(:E)}#{m(:G)}/
    patterns << /#{m(:num)}#{m(:R)}#{m(:A)}#{m(:D)}/
    patterns << /#{m(:num)}#{m(:G)}#{m(:R)}#{m(:A)}#{m(:D)}/
  end

  token :TIME do |patterns|
    patterns << /#{m(:num)}#{m(:M)}#{m(:S)}/
    patterns << /#{m(:num)}#{m(:S)}/
  end

  token :FREQ do |patterns|
    patterns << /#{m(:num)}#{m(:H)}#{m(:Z)}/
    patterns << /#{m(:num)}#{m(:K)}#{m(:H)}#{m(:Z)}/
  end

  token :URI do |patterns|
    patterns << /url\(#{m(:w)}#{m(:string)}#{m(:w)}\)/
    patterns << /url\(#{m(:w)}([!$%&*-~]|#{m(:nonascii)}|#{m(:escape)})*#{m(:w)}\)/
  end

  token(:DIMENSION, /#{m(:num)}#{m(:ident)}/)
  token(:PERCENTAGE, /#{m(:num)}%/)
  token(:HEXNUM, /##{m(:h)}{2,6}/)
  token(:NUMBER, /#{m(:num)}/)

end

#stepObject



207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# File 'lib/syntax/lang/css21.rb', line 207

def step

  case

  # scanning selectors only
  when @selector && scan(@tokens[:LBRACE])
    @selector = false
    start_group :normal, matched
  when @selector && scan(@tokens[:IMPORT_SYM])
    start_group :import, matched
  when @selector && scan(@tokens[:PAGE_SYM])
    start_group :page, matched
  when @selector && scan(@tokens[:MEDIA_SYM])
    start_group :media, matched
  when @selector && scan(@tokens[:CHARSET_SYM])
    start_group :charset, matched
  when @selector && scan(@tokens[:HASH])
    start_group :normal, matched
  when @selector && scan(@tokens[:URI])
    start_group :uri, matched

  when @selector && scan(@tokens[:IDENT])
    if MY_TAGS.include?( matched )
      start_group :tag, matched
    else
      start_group :ident, matched
    end

  # scanning declarations only
  when !@selector && scan(@tokens[:RBRACE])
    @selector = true
    start_group :normal, matched
  when !@selector && scan(@tokens[:FUNCTION])
    start_group :function, matched
  when !@selector && scan(@tokens[:EMS])
    start_group :ems, matched
  when !@selector && scan(@tokens[:EXS])
    start_group :exs, matched
  when !@selector && scan(@tokens[:LENGTH])
    start_group :length, matched
  when !@selector && scan(@tokens[:ANGLE])
    start_group :angle, matched
  when !@selector && scan(@tokens[:TIME])
    start_group :time, matched
  when !@selector && scan(@tokens[:FREQ])
    start_group :freq, matched
  when !@selector && scan(@tokens[:PERCENTAGE])
    start_group :percentage, matched
  when !@selector && scan(@tokens[:DIMENSION])
    start_group :dimension, matched
  when !@selector && scan(@tokens[:NUMBER])
    start_group :number, matched
  when !@selector && scan(@tokens[:HEXNUM])
    start_group :number, matched
  when !@selector && scan(@tokens[:IMPORTANT_SYM])
    start_group :important, matched

  when !@selector && scan(@tokens[:IDENT])
    if CSS21_PROPERTIES.include?( matched ) # are they disjoint?
      start_group :property, matched
    elsif CSS21_KEYWORDS.include?( matched )
      start_group :keyword, matched
    else
      start_group :ident, matched
    end

  # scanning both
  when scan(@tokens[:S])
    start_group :normal, matched
  when scan(@tokens[:COMMENT])
    start_group :comment, matched
  when scan(@tokens[:STRING])
    start_group :string, matched
  when scan(@tokens[:CDO])
    start_group :cdo, matched
  when scan(@tokens[:CDC])
    start_group :cdc, matched
  when scan(@tokens[:INVALID])
    start_group :invalid, matched
  else
    start_group :normal, scan(/./x)
  end

end