Class: Web::Unit::HTMLParser
Overview
Constant Summary
Constants inherited
from SGMLParser
SGMLParser::Attrfind, SGMLParser::Charref, SGMLParser::Commentclose, SGMLParser::Commentopen, SGMLParser::Endbracket, SGMLParser::Endtagopen, SGMLParser::Entitydefs, SGMLParser::Entityref, SGMLParser::Incomplete, SGMLParser::Interesting, SGMLParser::Special, SGMLParser::Starttagopen, SGMLParser::Tagfind
Instance Method Summary
collapse
Methods inherited from SGMLParser
#close, #feed, #finish_endtag, #finish_starttag, #goahead, #handle_charref, #handle_comment, #handle_endtag, #handle_entityref, #handle_special, #handle_starttag, #has_context, #parse_comment, #parse_endtag, #parse_special, #parse_starttag, #report_unbalanced, #reset, #setliteral, #setnomoretags, #unknown_charref, #unknown_entityref
Constructor Details
#initialize(formatter, verbose = nil) ⇒ HTMLParser
Returns a new instance of HTMLParser.
8
9
10
11
12
13
14
15
16
17
18
19
|
# File 'lib/web/unit/html-parser.rb', line 8
def initialize(formatter, verbose=nil)
super(verbose)
@formatter = formatter
@savedata = nil
@isindex = 0
@title = nil
@base = nil
@anchor = nil
@anchorlist = []
@nofill = 0
@list_stack = []
end
|
Instance Method Details
#anchor_bgn(href, name, type) ⇒ Object
48
49
50
51
52
53
|
# File 'lib/web/unit/html-parser.rb', line 48
def anchor_bgn(href, name, type)
@anchor = href
if @anchor
@anchorlist << href
end
end
|
#anchor_end ⇒ Object
55
56
57
58
59
60
|
# File 'lib/web/unit/html-parser.rb', line 55
def anchor_end
if @anchor
@anchor = nil
end
end
|
#ddpop(bl = 0) ⇒ Object
306
307
308
309
310
311
312
313
314
|
# File 'lib/web/unit/html-parser.rb', line 306
def ddpop(bl=0)
@formatter.end_paragraph(bl)
if @list_stack.length > 0
if @list_stack[-1][0] == 'dd'
@list_stack.pop
@formatter.pop_margin
end
end
end
|
#do_base(attrs) ⇒ Object
83
84
85
86
87
88
89
|
# File 'lib/web/unit/html-parser.rb', line 83
def do_base(attrs)
for a, v in attrs
if a == 'href'
@base = v
end
end
end
|
#do_br(attrs) ⇒ Object
381
382
383
|
# File 'lib/web/unit/html-parser.rb', line 381
def do_br(attrs)
@formatter.add_line_break
end
|
#do_dd(attrs) ⇒ Object
300
301
302
303
304
|
# File 'lib/web/unit/html-parser.rb', line 300
def do_dd(attrs)
ddpop
@formatter.push_margin('dd')
@list_stack << ['dd', '', 0]
end
|
#do_dt(attrs) ⇒ Object
296
297
298
|
# File 'lib/web/unit/html-parser.rb', line 296
def do_dt(attrs)
ddpop
end
|
#do_hr(attrs) ⇒ Object
385
386
387
|
# File 'lib/web/unit/html-parser.rb', line 385
def do_hr(attrs)
@formatter.add_hor_rule
end
|
#do_img(attrs) ⇒ Object
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
|
# File 'lib/web/unit/html-parser.rb', line 389
def do_img(attrs)
align = nil
alt = '(image)'
ismap = nil
src = nil
width = 0
height = 0
for attrname, value in attrs
if attrname == 'align'
align = value
end
if attrname == 'alt'
alt = value
end
if attrname == 'ismap'
ismap = value
end
if attrname == 'src'
src = value
end
if attrname == 'width'
width = Integer(value)
end
if attrname == 'height'
height = Integer(value)
end
end
handle_image(src, alt, ismap, align, width, height)
end
|
#do_isindex(attrs) ⇒ Object
91
92
93
|
# File 'lib/web/unit/html-parser.rb', line 91
def do_isindex(attrs)
@isindex = 1
end
|
#do_li(attrs) ⇒ Object
234
235
236
237
238
239
240
241
242
243
|
# File 'lib/web/unit/html-parser.rb', line 234
def do_li(attrs)
@formatter.end_paragraph(0)
if @list_stack && @list_stack.size > 0
dummy, label, counter = top = @list_stack[-1]
top[2] = counter = counter+1
else
label, counter = '*', 0
end
@formatter.add_label_data(label, counter)
end
|
#do_link(attrs) ⇒ Object
95
96
|
# File 'lib/web/unit/html-parser.rb', line 95
def do_link(attrs)
end
|
98
99
|
# File 'lib/web/unit/html-parser.rb', line 98
def do_meta(attrs)
end
|
#do_nextid(attrs) ⇒ Object
101
102
|
# File 'lib/web/unit/html-parser.rb', line 101
def do_nextid(attrs) end
|
#do_p(attrs) ⇒ Object
165
166
167
|
# File 'lib/web/unit/html-parser.rb', line 165
def do_p(attrs)
@formatter.end_paragraph(1)
end
|
#do_plaintext(attrs) ⇒ Object
419
420
421
422
|
# File 'lib/web/unit/html-parser.rb', line 419
def do_plaintext(attrs)
start_pre(attrs)
setnomoretags end
|
377
378
379
|
# File 'lib/web/unit/html-parser.rb', line 377
def end_a
anchor_end
end
|
#end_address ⇒ Object
205
206
207
208
|
# File 'lib/web/unit/html-parser.rb', line 205
def end_address
@formatter.end_paragraph(0)
@formatter.pop_font()
end
|
347
348
349
|
# File 'lib/web/unit/html-parser.rb', line 347
def end_b
@formatter.pop_font
end
|
#end_blockquote ⇒ Object
215
216
217
218
|
# File 'lib/web/unit/html-parser.rb', line 215
def end_blockquote
@formatter.end_paragraph(1)
@formatter.pop_margin()
end
|
73
|
# File 'lib/web/unit/html-parser.rb', line 73
def end_body() end
|
317
|
# File 'lib/web/unit/html-parser.rb', line 317
def end_cite() end_i end
|
320
|
# File 'lib/web/unit/html-parser.rb', line 320
def end_code() end_tt end
|
280
281
282
|
# File 'lib/web/unit/html-parser.rb', line 280
def end_dir
end_ul
end
|
289
290
291
292
293
294
|
# File 'lib/web/unit/html-parser.rb', line 289
def end_dl
ddpop(1)
if @list_stack.length > 0
@list_stack.pop
end
end
|
323
|
# File 'lib/web/unit/html-parser.rb', line 323
def end_em() end_i end
|
110
111
112
113
|
# File 'lib/web/unit/html-parser.rb', line 110
def end_h1
@formatter.end_paragraph(1)
@formatter.pop_font()
end
|
120
121
122
123
|
# File 'lib/web/unit/html-parser.rb', line 120
def end_h2
@formatter.end_paragraph(1)
@formatter.pop_font()
end
|
130
131
132
133
|
# File 'lib/web/unit/html-parser.rb', line 130
def end_h3
@formatter.end_paragraph(1)
@formatter.pop_font()
end
|
140
141
142
143
|
# File 'lib/web/unit/html-parser.rb', line 140
def end_h4
@formatter.end_paragraph(1)
@formatter.pop_font()
end
|
150
151
152
153
|
# File 'lib/web/unit/html-parser.rb', line 150
def end_h5
@formatter.end_paragraph(1)
@formatter.pop_font()
end
|
160
161
162
163
|
# File 'lib/web/unit/html-parser.rb', line 160
def end_h6
@formatter.end_paragraph(1)
@formatter.pop_font()
end
|
70
|
# File 'lib/web/unit/html-parser.rb', line 70
def end_head() end
|
67
|
# File 'lib/web/unit/html-parser.rb', line 67
def end_html() end
|
340
341
342
|
# File 'lib/web/unit/html-parser.rb', line 340
def end_i
@formatter.pop_font
end
|
326
|
# File 'lib/web/unit/html-parser.rb', line 326
def end_kbd() end_tt end
|
#end_listing ⇒ Object
196
197
198
|
# File 'lib/web/unit/html-parser.rb', line 196
def end_listing
end_pre
end
|
272
273
274
|
# File 'lib/web/unit/html-parser.rb', line 272
def
end_ul
end
|
260
261
262
263
264
265
266
|
# File 'lib/web/unit/html-parser.rb', line 260
def end_ol
if @list_stack
@list_stack.pop
end
@formatter.end_paragraph(0)
@formatter.pop_margin
end
|
175
176
177
178
179
180
|
# File 'lib/web/unit/html-parser.rb', line 175
def end_pre
@formatter.end_paragraph(1)
@formatter.pop_font()
@nofill = @nofill - 1
if @nofill < 0 then @nofill = 0 end
end
|
329
|
# File 'lib/web/unit/html-parser.rb', line 329
def end_samp() end_tt end
|
#end_strong ⇒ Object
332
|
# File 'lib/web/unit/html-parser.rb', line 332
def end_strong() end_b end
|
#end_title ⇒ Object
79
80
81
|
# File 'lib/web/unit/html-parser.rb', line 79
def end_title
@title = save_end
end
|
354
355
356
|
# File 'lib/web/unit/html-parser.rb', line 354
def end_tt
@formatter.pop_font
end
|
226
227
228
229
230
231
232
|
# File 'lib/web/unit/html-parser.rb', line 226
def end_ul
if @list_stack
@list_stack.pop
end
@formatter.end_paragraph(0)
@formatter.pop_margin
end
|
335
|
# File 'lib/web/unit/html-parser.rb', line 335
def end_var() end_i end
|
187
188
189
|
# File 'lib/web/unit/html-parser.rb', line 187
def end_xmp
end_pre
end
|
#handle_data(data) ⇒ Object
22
23
24
25
26
27
28
29
30
31
32
|
# File 'lib/web/unit/html-parser.rb', line 22
def handle_data(data)
if @savedata
@savedata = @savedata + data
else
if @nofill != 0
@formatter.add_literal_data(data)
else
@formatter.add_flowing_data(data)
end
end
end
|
#handle_image(src, alt, *args) ⇒ Object
62
63
64
|
# File 'lib/web/unit/html-parser.rb', line 62
def handle_image(src, alt, *args)
handle_data(alt)
end
|
34
35
36
|
# File 'lib/web/unit/html-parser.rb', line 34
def save_bgn
@savedata = ''
end
|
38
39
40
41
42
43
44
45
46
|
# File 'lib/web/unit/html-parser.rb', line 38
def save_end
data = @savedata
@savedata = nil
data = '' if data == nil
if @nofill == 0
data = data.split.join(" ")
end
return data
end
|
#start_a(attrs) ⇒ Object
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
|
# File 'lib/web/unit/html-parser.rb', line 358
def start_a(attrs)
href = nil
name = nil
type = nil
for attrname, value in attrs
value = value.strip
if attrname == 'href'
href = value
end
if attrname == 'name'
name = value
end
if attrname == 'type'
type = value.downcase
end
end
anchor_bgn(href, name, type)
end
|
#start_address(attrs) ⇒ Object
200
201
202
203
|
# File 'lib/web/unit/html-parser.rb', line 200
def start_address(attrs)
@formatter.end_paragraph(0)
@formatter.push_font(nil, 1, nil, nil)
end
|
#start_b(attrs) ⇒ Object
344
345
346
|
# File 'lib/web/unit/html-parser.rb', line 344
def start_b(attrs)
@formatter.push_font(nil, nil, 1, nil)
end
|
#start_blockquote(attrs) ⇒ Object
210
211
212
213
|
# File 'lib/web/unit/html-parser.rb', line 210
def start_blockquote(attrs)
@formatter.end_paragraph(1)
@formatter.push_margin('blockquote')
end
|
#start_body(attrs) ⇒ Object
72
|
# File 'lib/web/unit/html-parser.rb', line 72
def start_body(attrs) end
|
#start_cite(attrs) ⇒ Object
316
|
# File 'lib/web/unit/html-parser.rb', line 316
def start_cite(attrs) start_i(attrs) end
|
#start_code(attrs) ⇒ Object
319
|
# File 'lib/web/unit/html-parser.rb', line 319
def start_code(attrs) start_tt(attrs) end
|
#start_dir(attrs) ⇒ Object
276
277
278
|
# File 'lib/web/unit/html-parser.rb', line 276
def start_dir(attrs)
start_ul(attrs)
end
|
#start_dl(attrs) ⇒ Object
284
285
286
287
|
# File 'lib/web/unit/html-parser.rb', line 284
def start_dl(attrs)
@formatter.end_paragraph(1)
@list_stack << ['dl', '', 0]
end
|
#start_em(attrs) ⇒ Object
322
|
# File 'lib/web/unit/html-parser.rb', line 322
def start_em(attrs) start_i(attrs) end
|
#start_h1(attrs) ⇒ Object
105
106
107
108
|
# File 'lib/web/unit/html-parser.rb', line 105
def start_h1(attrs)
@formatter.end_paragraph(1)
@formatter.push_font('h1', 0, 1, 0)
end
|
#start_h2(attrs) ⇒ Object
115
116
117
118
|
# File 'lib/web/unit/html-parser.rb', line 115
def start_h2(attrs)
@formatter.end_paragraph(1)
@formatter.push_font('h2', 0, 1, 0)
end
|
#start_h3(attrs) ⇒ Object
125
126
127
128
|
# File 'lib/web/unit/html-parser.rb', line 125
def start_h3(attrs)
@formatter.end_paragraph(1)
@formatter.push_font('h3', 0, 1, 0)
end
|
#start_h4(attrs) ⇒ Object
135
136
137
138
|
# File 'lib/web/unit/html-parser.rb', line 135
def start_h4(attrs)
@formatter.end_paragraph(1)
@formatter.push_font('h4', 0, 1, 0)
end
|
#start_h5(attrs) ⇒ Object
145
146
147
148
|
# File 'lib/web/unit/html-parser.rb', line 145
def start_h5(attrs)
@formatter.end_paragraph(1)
@formatter.push_font('h5', 0, 1, 0)
end
|
#start_h6(attrs) ⇒ Object
155
156
157
158
|
# File 'lib/web/unit/html-parser.rb', line 155
def start_h6(attrs)
@formatter.end_paragraph(1)
@formatter.push_font('h6', 0, 1, 0)
end
|
#start_head(attrs) ⇒ Object
69
|
# File 'lib/web/unit/html-parser.rb', line 69
def start_head(attrs) end
|
#start_html(attrs) ⇒ Object
66
|
# File 'lib/web/unit/html-parser.rb', line 66
def start_html(attrs) end
|
#start_i(attrs) ⇒ Object
337
338
339
|
# File 'lib/web/unit/html-parser.rb', line 337
def start_i(attrs)
@formatter.push_font(nil, 1, nil, nil)
end
|
#start_kbd(attrs) ⇒ Object
325
|
# File 'lib/web/unit/html-parser.rb', line 325
def start_kbd(attrs) start_tt(attrs) end
|
#start_listing(attrs) ⇒ Object
191
192
193
194
|
# File 'lib/web/unit/html-parser.rb', line 191
def start_listing(attrs)
start_pre(attrs)
setliteral('listing') end
|
268
269
270
|
# File 'lib/web/unit/html-parser.rb', line 268
def (attrs)
start_ul(attrs)
end
|
#start_ol(attrs) ⇒ Object
245
246
247
248
249
250
251
252
253
254
255
256
257
258
|
# File 'lib/web/unit/html-parser.rb', line 245
def start_ol(attrs)
@formatter.end_paragraph(0)
@formatter.push_margin('ol')
label = '1.'
for a, v in attrs
if a == 'type'
if v.length == 1
v = v + '.'
label = v
end
end
end
@list_stack << ['ol', label, 0]
end
|
#start_pre(attrs) ⇒ Object
169
170
171
172
173
|
# File 'lib/web/unit/html-parser.rb', line 169
def start_pre(attrs)
@formatter.end_paragraph(1)
@formatter.push_font(nil, nil, nil, 1)
@nofill = @nofill + 1
end
|
#start_samp(attrs) ⇒ Object
328
|
# File 'lib/web/unit/html-parser.rb', line 328
def start_samp(attrs) start_tt(attrs) end
|
#start_strong(attrs) ⇒ Object
331
|
# File 'lib/web/unit/html-parser.rb', line 331
def start_strong(attrs) start_b(attrs) end
|
#start_title(attrs) ⇒ Object
75
76
77
|
# File 'lib/web/unit/html-parser.rb', line 75
def start_title(attrs)
save_bgn
end
|
#start_tt(attrs) ⇒ Object
351
352
353
|
# File 'lib/web/unit/html-parser.rb', line 351
def start_tt(attrs)
@formatter.push_font(nil, nil, nil, 1)
end
|
#start_ul(attrs) ⇒ Object
220
221
222
223
224
|
# File 'lib/web/unit/html-parser.rb', line 220
def start_ul(attrs)
@formatter.end_paragraph(0)
@formatter.push_margin('ul')
@list_stack << ['ul', '*', 0]
end
|
#start_var(attrs) ⇒ Object
334
|
# File 'lib/web/unit/html-parser.rb', line 334
def start_var(attrs) start_i(attrs) end
|
#start_xmp(attrs) ⇒ Object
182
183
184
185
|
# File 'lib/web/unit/html-parser.rb', line 182
def start_xmp(attrs)
start_pre(attrs)
setliteral('xmp') end
|
#unknown_endtag(tag) ⇒ Object
427
428
|
# File 'lib/web/unit/html-parser.rb', line 427
def unknown_endtag(tag)
end
|
#unknown_starttag(tag, attrs) ⇒ Object
424
425
|
# File 'lib/web/unit/html-parser.rb', line 424
def unknown_starttag(tag, attrs)
end
|