Class: Web::Unit::HTMLParser

Inherits:
SGMLParser show all
Defined in:
lib/web/unit/html-parser.rb

Overview

:nodoc: all

Constant Summary

Constants inherited from SGMLParser

SGMLParser::Attrfind, SGMLParser::Charref, SGMLParser::Commentclose, SGMLParser::Commentopen, SGMLParser::Endbracket, SGMLParser::Endtagopen, SGMLParser::Entitydefs, SGMLParser::Entityref, SGMLParser::Incomplete, SGMLParser::Interesting, SGMLParser::Special, SGMLParser::Starttagopen, SGMLParser::Tagfind

Instance Method Summary collapse

Methods inherited from SGMLParser

#close, #feed, #finish_endtag, #finish_starttag, #goahead, #handle_charref, #handle_comment, #handle_endtag, #handle_entityref, #handle_special, #handle_starttag, #has_context, #parse_comment, #parse_endtag, #parse_special, #parse_starttag, #report_unbalanced, #reset, #setliteral, #setnomoretags, #unknown_charref, #unknown_entityref

Constructor Details

#initialize(formatter, verbose = nil) ⇒ HTMLParser

Returns a new instance of HTMLParser.



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/web/unit/html-parser.rb', line 8

def initialize(formatter, verbose=nil)
  super(verbose)
  @formatter = formatter
  @savedata = nil
  @isindex = 0
  @title = nil
  @base = nil
  @anchor = nil
  @anchorlist = []
  @nofill = 0
  @list_stack = []
end

Instance Method Details

#anchor_bgn(href, name, type) ⇒ Object



48
49
50
51
52
53
# File 'lib/web/unit/html-parser.rb', line 48

def anchor_bgn(href, name, type)
  @anchor = href
  if @anchor
    @anchorlist << href
  end
end

#anchor_endObject



55
56
57
58
59
60
# File 'lib/web/unit/html-parser.rb', line 55

def anchor_end
  if @anchor
    #handle_data(format "[%d]", @anchorlist.length)
    @anchor = nil
  end
end

#ddpop(bl = 0) ⇒ Object



306
307
308
309
310
311
312
313
314
# File 'lib/web/unit/html-parser.rb', line 306

def ddpop(bl=0)
  @formatter.end_paragraph(bl)
  if @list_stack.length > 0
    if @list_stack[-1][0] == 'dd'
      @list_stack.pop
      @formatter.pop_margin
    end
  end
end

#do_base(attrs) ⇒ Object



83
84
85
86
87
88
89
# File 'lib/web/unit/html-parser.rb', line 83

def do_base(attrs)
  for a, v in attrs
    if a == 'href'
      @base = v
    end
  end
end

#do_br(attrs) ⇒ Object



381
382
383
# File 'lib/web/unit/html-parser.rb', line 381

def do_br(attrs)
  @formatter.add_line_break
end

#do_dd(attrs) ⇒ Object



300
301
302
303
304
# File 'lib/web/unit/html-parser.rb', line 300

def do_dd(attrs)
  ddpop
  @formatter.push_margin('dd')
  @list_stack << ['dd', '', 0]
end

#do_dt(attrs) ⇒ Object



296
297
298
# File 'lib/web/unit/html-parser.rb', line 296

def do_dt(attrs)
  ddpop
end

#do_hr(attrs) ⇒ Object



385
386
387
# File 'lib/web/unit/html-parser.rb', line 385

def do_hr(attrs)
  @formatter.add_hor_rule
end

#do_img(attrs) ⇒ Object



389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
# File 'lib/web/unit/html-parser.rb', line 389

def do_img(attrs)
  align = nil
  alt = '(image)'
  ismap = nil
  src = nil
  width = 0
  height = 0
  for attrname, value in attrs
    if attrname == 'align'
      align = value
    end
    if attrname == 'alt'
      alt = value
    end
    if attrname == 'ismap'
      ismap = value
    end
    if attrname == 'src'
      src = value
    end
    if attrname == 'width'
      width = Integer(value)
    end
    if attrname == 'height'
      height = Integer(value)
    end
  end
  handle_image(src, alt, ismap, align, width, height)
end

#do_isindex(attrs) ⇒ Object



91
92
93
# File 'lib/web/unit/html-parser.rb', line 91

def do_isindex(attrs)
  @isindex = 1
end

#do_li(attrs) ⇒ Object



234
235
236
237
238
239
240
241
242
243
# File 'lib/web/unit/html-parser.rb', line 234

def do_li(attrs)
  @formatter.end_paragraph(0)
  if @list_stack && @list_stack.size > 0
    dummy, label, counter = top = @list_stack[-1]
    top[2] = counter = counter+1
  else
    label, counter = '*', 0
  end
  @formatter.add_label_data(label, counter)
end


95
96
# File 'lib/web/unit/html-parser.rb', line 95

def do_link(attrs)
end

#do_meta(attrs) ⇒ Object



98
99
# File 'lib/web/unit/html-parser.rb', line 98

def do_meta(attrs)
end

#do_nextid(attrs) ⇒ Object

Deprecated



101
102
# File 'lib/web/unit/html-parser.rb', line 101

def do_nextid(attrs) # Deprecated
end

#do_p(attrs) ⇒ Object



165
166
167
# File 'lib/web/unit/html-parser.rb', line 165

def do_p(attrs)
  @formatter.end_paragraph(1)
end

#do_plaintext(attrs) ⇒ Object



419
420
421
422
# File 'lib/web/unit/html-parser.rb', line 419

def do_plaintext(attrs)
  start_pre(attrs)
  setnomoretags # Tell SGML parser
end

#end_aObject



377
378
379
# File 'lib/web/unit/html-parser.rb', line 377

def end_a
  anchor_end
end

#end_addressObject



205
206
207
208
# File 'lib/web/unit/html-parser.rb', line 205

def end_address
  @formatter.end_paragraph(0)
  @formatter.pop_font()
end

#end_bObject



347
348
349
# File 'lib/web/unit/html-parser.rb', line 347

def end_b
  @formatter.pop_font
end

#end_blockquoteObject



215
216
217
218
# File 'lib/web/unit/html-parser.rb', line 215

def end_blockquote
  @formatter.end_paragraph(1)
  @formatter.pop_margin()
end

#end_bodyObject



73
# File 'lib/web/unit/html-parser.rb', line 73

def end_body() end

#end_citeObject



317
# File 'lib/web/unit/html-parser.rb', line 317

def end_cite() end_i end

#end_codeObject



320
# File 'lib/web/unit/html-parser.rb', line 320

def end_code() end_tt end

#end_dirObject



280
281
282
# File 'lib/web/unit/html-parser.rb', line 280

def end_dir
  end_ul
end

#end_dlObject



289
290
291
292
293
294
# File 'lib/web/unit/html-parser.rb', line 289

def end_dl
  ddpop(1)
  if @list_stack.length > 0
    @list_stack.pop
  end
end

#end_emObject



323
# File 'lib/web/unit/html-parser.rb', line 323

def end_em() end_i end

#end_h1Object



110
111
112
113
# File 'lib/web/unit/html-parser.rb', line 110

def end_h1
  @formatter.end_paragraph(1)
  @formatter.pop_font()
end

#end_h2Object



120
121
122
123
# File 'lib/web/unit/html-parser.rb', line 120

def end_h2
  @formatter.end_paragraph(1)
  @formatter.pop_font()
end

#end_h3Object



130
131
132
133
# File 'lib/web/unit/html-parser.rb', line 130

def end_h3
  @formatter.end_paragraph(1)
  @formatter.pop_font()
end

#end_h4Object



140
141
142
143
# File 'lib/web/unit/html-parser.rb', line 140

def end_h4
  @formatter.end_paragraph(1)
  @formatter.pop_font()
end

#end_h5Object



150
151
152
153
# File 'lib/web/unit/html-parser.rb', line 150

def end_h5
  @formatter.end_paragraph(1)
  @formatter.pop_font()
end

#end_h6Object



160
161
162
163
# File 'lib/web/unit/html-parser.rb', line 160

def end_h6
  @formatter.end_paragraph(1)
  @formatter.pop_font()
end

#end_headObject



70
# File 'lib/web/unit/html-parser.rb', line 70

def end_head() end

#end_htmlObject



67
# File 'lib/web/unit/html-parser.rb', line 67

def end_html() end

#end_iObject



340
341
342
# File 'lib/web/unit/html-parser.rb', line 340

def end_i
  @formatter.pop_font
end

#end_kbdObject



326
# File 'lib/web/unit/html-parser.rb', line 326

def end_kbd() end_tt end

#end_listingObject



196
197
198
# File 'lib/web/unit/html-parser.rb', line 196

def end_listing
  end_pre
end

#end_menuObject



272
273
274
# File 'lib/web/unit/html-parser.rb', line 272

def end_menu
  end_ul
end

#end_olObject



260
261
262
263
264
265
266
# File 'lib/web/unit/html-parser.rb', line 260

def end_ol
  if @list_stack
    @list_stack.pop
  end
  @formatter.end_paragraph(0)
  @formatter.pop_margin
end

#end_preObject



175
176
177
178
179
180
# File 'lib/web/unit/html-parser.rb', line 175

def end_pre
  @formatter.end_paragraph(1)
  @formatter.pop_font()
  @nofill = @nofill - 1
  if @nofill < 0 then @nofill = 0 end
end

#end_sampObject



329
# File 'lib/web/unit/html-parser.rb', line 329

def end_samp() end_tt end

#end_strongObject



332
# File 'lib/web/unit/html-parser.rb', line 332

def end_strong() end_b end

#end_titleObject



79
80
81
# File 'lib/web/unit/html-parser.rb', line 79

def end_title
  @title = save_end
end

#end_ttObject



354
355
356
# File 'lib/web/unit/html-parser.rb', line 354

def end_tt
  @formatter.pop_font
end

#end_ulObject



226
227
228
229
230
231
232
# File 'lib/web/unit/html-parser.rb', line 226

def end_ul
  if @list_stack
    @list_stack.pop
  end
  @formatter.end_paragraph(0)
  @formatter.pop_margin
end

#end_varObject



335
# File 'lib/web/unit/html-parser.rb', line 335

def end_var() end_i end

#end_xmpObject



187
188
189
# File 'lib/web/unit/html-parser.rb', line 187

def end_xmp
  end_pre
end

#handle_data(data) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
# File 'lib/web/unit/html-parser.rb', line 22

def handle_data(data)
  if @savedata
    @savedata = @savedata + data
  else
    if @nofill != 0
      @formatter.add_literal_data(data)
    else
      @formatter.add_flowing_data(data)
    end
  end
end

#handle_image(src, alt, *args) ⇒ Object



62
63
64
# File 'lib/web/unit/html-parser.rb', line 62

def handle_image(src, alt, *args)
  handle_data(alt)
end

#save_bgnObject



34
35
36
# File 'lib/web/unit/html-parser.rb', line 34

def save_bgn
  @savedata = ''
end

#save_endObject



38
39
40
41
42
43
44
45
46
# File 'lib/web/unit/html-parser.rb', line 38

def save_end
  data = @savedata
  @savedata = nil
  data = '' if data == nil
  if @nofill == 0
    data = data.split.join(" ")
  end
  return data
end

#start_a(attrs) ⇒ Object



358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
# File 'lib/web/unit/html-parser.rb', line 358

def start_a(attrs)
  href = nil
  name = nil
  type = nil
  for attrname, value in attrs
    value = value.strip
    if attrname == 'href'
      href = value
    end
    if attrname == 'name'
      name = value
    end
    if attrname == 'type'
      type = value.downcase
    end
  end
  anchor_bgn(href, name, type)
end

#start_address(attrs) ⇒ Object



200
201
202
203
# File 'lib/web/unit/html-parser.rb', line 200

def start_address(attrs)
  @formatter.end_paragraph(0)
  @formatter.push_font(nil, 1, nil, nil)
end

#start_b(attrs) ⇒ Object



344
345
346
# File 'lib/web/unit/html-parser.rb', line 344

def start_b(attrs)
  @formatter.push_font(nil, nil, 1, nil)
end

#start_blockquote(attrs) ⇒ Object



210
211
212
213
# File 'lib/web/unit/html-parser.rb', line 210

def start_blockquote(attrs)
  @formatter.end_paragraph(1)
  @formatter.push_margin('blockquote')
end

#start_body(attrs) ⇒ Object



72
# File 'lib/web/unit/html-parser.rb', line 72

def start_body(attrs) end

#start_cite(attrs) ⇒ Object



316
# File 'lib/web/unit/html-parser.rb', line 316

def start_cite(attrs) start_i(attrs) end

#start_code(attrs) ⇒ Object



319
# File 'lib/web/unit/html-parser.rb', line 319

def start_code(attrs) start_tt(attrs) end

#start_dir(attrs) ⇒ Object



276
277
278
# File 'lib/web/unit/html-parser.rb', line 276

def start_dir(attrs)
  start_ul(attrs)
end

#start_dl(attrs) ⇒ Object



284
285
286
287
# File 'lib/web/unit/html-parser.rb', line 284

def start_dl(attrs)
  @formatter.end_paragraph(1)
  @list_stack << ['dl', '', 0]
end

#start_em(attrs) ⇒ Object



322
# File 'lib/web/unit/html-parser.rb', line 322

def start_em(attrs) start_i(attrs) end

#start_h1(attrs) ⇒ Object



105
106
107
108
# File 'lib/web/unit/html-parser.rb', line 105

def start_h1(attrs)
  @formatter.end_paragraph(1)
  @formatter.push_font('h1', 0, 1, 0)
end

#start_h2(attrs) ⇒ Object



115
116
117
118
# File 'lib/web/unit/html-parser.rb', line 115

def start_h2(attrs)
  @formatter.end_paragraph(1)
  @formatter.push_font('h2', 0, 1, 0)
end

#start_h3(attrs) ⇒ Object



125
126
127
128
# File 'lib/web/unit/html-parser.rb', line 125

def start_h3(attrs)
  @formatter.end_paragraph(1)
  @formatter.push_font('h3', 0, 1, 0)
end

#start_h4(attrs) ⇒ Object



135
136
137
138
# File 'lib/web/unit/html-parser.rb', line 135

def start_h4(attrs)
  @formatter.end_paragraph(1)
  @formatter.push_font('h4', 0, 1, 0)
end

#start_h5(attrs) ⇒ Object



145
146
147
148
# File 'lib/web/unit/html-parser.rb', line 145

def start_h5(attrs)
  @formatter.end_paragraph(1)
  @formatter.push_font('h5', 0, 1, 0)
end

#start_h6(attrs) ⇒ Object



155
156
157
158
# File 'lib/web/unit/html-parser.rb', line 155

def start_h6(attrs)
  @formatter.end_paragraph(1)
  @formatter.push_font('h6', 0, 1, 0)
end

#start_head(attrs) ⇒ Object



69
# File 'lib/web/unit/html-parser.rb', line 69

def start_head(attrs) end

#start_html(attrs) ⇒ Object



66
# File 'lib/web/unit/html-parser.rb', line 66

def start_html(attrs) end

#start_i(attrs) ⇒ Object



337
338
339
# File 'lib/web/unit/html-parser.rb', line 337

def start_i(attrs)
  @formatter.push_font(nil, 1, nil, nil)
end

#start_kbd(attrs) ⇒ Object



325
# File 'lib/web/unit/html-parser.rb', line 325

def start_kbd(attrs) start_tt(attrs) end

#start_listing(attrs) ⇒ Object



191
192
193
194
# File 'lib/web/unit/html-parser.rb', line 191

def start_listing(attrs)
  start_pre(attrs)
  setliteral('listing') # Tell SGML parser
end

#start_menu(attrs) ⇒ Object



268
269
270
# File 'lib/web/unit/html-parser.rb', line 268

def start_menu(attrs)
  start_ul(attrs)
end

#start_ol(attrs) ⇒ Object



245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'lib/web/unit/html-parser.rb', line 245

def start_ol(attrs)
  @formatter.end_paragraph(0)
  @formatter.push_margin('ol')
  label = '1.'
  for a, v in attrs
    if a == 'type'
      if v.length == 1
        v = v + '.'
        label = v
      end
    end
  end
  @list_stack << ['ol', label, 0]
end

#start_pre(attrs) ⇒ Object



169
170
171
172
173
# File 'lib/web/unit/html-parser.rb', line 169

def start_pre(attrs)
  @formatter.end_paragraph(1)
  @formatter.push_font(nil, nil, nil, 1)
  @nofill = @nofill + 1
end

#start_samp(attrs) ⇒ Object



328
# File 'lib/web/unit/html-parser.rb', line 328

def start_samp(attrs) start_tt(attrs) end

#start_strong(attrs) ⇒ Object



331
# File 'lib/web/unit/html-parser.rb', line 331

def start_strong(attrs) start_b(attrs) end

#start_title(attrs) ⇒ Object



75
76
77
# File 'lib/web/unit/html-parser.rb', line 75

def start_title(attrs)
  save_bgn
end

#start_tt(attrs) ⇒ Object



351
352
353
# File 'lib/web/unit/html-parser.rb', line 351

def start_tt(attrs)
  @formatter.push_font(nil, nil, nil, 1)
end

#start_ul(attrs) ⇒ Object



220
221
222
223
224
# File 'lib/web/unit/html-parser.rb', line 220

def start_ul(attrs)
  @formatter.end_paragraph(0)
  @formatter.push_margin('ul')
  @list_stack << ['ul', '*', 0]
end

#start_var(attrs) ⇒ Object



334
# File 'lib/web/unit/html-parser.rb', line 334

def start_var(attrs) start_i(attrs) end

#start_xmp(attrs) ⇒ Object



182
183
184
185
# File 'lib/web/unit/html-parser.rb', line 182

def start_xmp(attrs)
  start_pre(attrs)
  setliteral('xmp') # Tell SGML parser
end

#unknown_endtag(tag) ⇒ Object



427
428
# File 'lib/web/unit/html-parser.rb', line 427

def unknown_endtag(tag)
end

#unknown_starttag(tag, attrs) ⇒ Object



424
425
# File 'lib/web/unit/html-parser.rb', line 424

def unknown_starttag(tag, attrs)
end