Class: WebUnit::Parser

Inherits:
SGMLParser show all
Includes:
Utils
Defined in:
lib/webunit/parser.rb

Constant Summary collapse

SingleTags =

input, frame, img

%w( meta br hr base link area )
@@ignoretags =
[]

Constants inherited from SGMLParser

SGMLParser::Attrfind, SGMLParser::Charref, SGMLParser::Commentclose, SGMLParser::Commentopen, SGMLParser::Endbracket, SGMLParser::Endtagopen, SGMLParser::Entitydefs, SGMLParser::Entityref, SGMLParser::Incomplete, SGMLParser::Interesting, SGMLParser::Special, SGMLParser::Starttagopen, SGMLParser::Tagfind

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Utils

#complete_url, #orthop_url, #parse_url

Methods inherited from SGMLParser

#close, #finish_endtag, #finish_starttag, #goahead, #handle_charref, #handle_comment, #handle_endtag, #handle_entityref, #handle_special, #handle_starttag, #has_context, #parse_comment, #parse_endtag, #parse_special, #parse_starttag, #report_unbalanced, #reset, #setliteral, #setnomoretags, #unknown_charref, #unknown_entityref

Constructor Details

#initializeParser

Returns a new instance of Parser.



22
23
24
25
26
27
28
# File 'lib/webunit/parser.rb', line 22

def initialize()
  super( NullFormatter.new )
  @elem_stack = [ HtmlElem::new( nil, nil ) ]
  @form_stack = []
  @table_stack = []
  self.ignore @@ignoretags
end

Class Method Details

.ignore(arr) ⇒ Object



212
213
214
# File 'lib/webunit/parser.rb', line 212

def self::ignore( arr )
  @@ignoretags = arr
end

Instance Method Details

#attrs_to_hash(attrs) ⇒ Object

utils



218
219
220
221
222
223
224
225
# File 'lib/webunit/parser.rb', line 218

def attrs_to_hash( attrs )
  h = {}
  for a, v in attrs
    v = v.split('"')[1]
    h[a] = v ? v : ''
  end
  h
end

#do_frame(attrs) ⇒ Object



189
190
191
192
193
194
# File 'lib/webunit/parser.rb', line 189

def do_frame( attrs )
  ah = attrs_to_hash attrs
  ah['src'] = complete_url( ah['src'], @response.url )
  elem = Frame::new( ah )
  @response.add_frame( elem )
end

#do_img(attrs) ⇒ Object



196
197
198
199
200
201
# File 'lib/webunit/parser.rb', line 196

def do_img( attrs )
  ah = attrs_to_hash attrs
  elem = Image::new( ah )
  @response.add_image( elem )
  @elem_stack.last.children.push elem
end

#do_input(attrs) ⇒ Object

special do ( input, frame, … )



166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/webunit/parser.rb', line 166

def do_input( attrs )
  ah = attrs_to_hash attrs
  case ah['type']
    when 'submit'
      elem = InputSubmit::new( ah )
    when 'reset'
      elem = InputReset::new( ah )
    when 'checkbox'
      elem = InputCheckbox::new( ah )
    when 'radio'
      elem = InputRadio::new( ah )
    when 'image'
      elem = InputImage::new( ah )
    when 'file'
      elem = InputFile::new( ah )
    else # text
      elem = Input::new( ah )
  end
	  
  @form_stack.last.add_param elem if @form_stack.last
  @elem_stack.last.children.push elem
end

#dotag(tag, attrs) ⇒ Object

do ( start only )



155
156
157
158
# File 'lib/webunit/parser.rb', line 155

def dotag( tag, attrs )
  elem = HtmlElem::new( tag, attrs_to_hash( attrs ) )
  @elem_stack.last.children.push elem
end

#end_formObject



94
95
96
97
# File 'lib/webunit/parser.rb', line 94

def end_form
  endtag( 'form' )
  @form_stack.pop
end

#end_selectObject



105
106
107
108
# File 'lib/webunit/parser.rb', line 105

def end_select
  endtag( 'select' )
  @form_stack.last.parameters.last.end_option
end

#end_tableObject



131
132
133
134
# File 'lib/webunit/parser.rb', line 131

def end_table
  endtag( 'table' )
  @table_stack.pop
end

#endtag(tag) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/webunit/parser.rb', line 50

def endtag( tag )
  elem = @elem_stack.pop
  if tag != elem.tag
    if SingleTags.include?(tag)
      @elem_stack.push(elem)
    else
      puts "'#{tag}'(wait for '#{elem.tag.to_s}')" if $DEBUG
      raise BadHtmlTags, "'#{tag}'(wait for '#{elem.tag.to_s}')"
    end
  end
  elem
end

#feed(response) ⇒ Object



30
31
32
33
34
35
36
37
38
39
# File 'lib/webunit/parser.rb', line 30

def feed( response )
  @response = response
  begin
    $stderr.puts HTMLSplit.new( @response.body ).repair.to_s if $DEBUG
    super( HTMLSplit.new( @response.body ).repair.to_s )
  rescue
    raise BadHtmlTags, $!.message + @elem_stack.inspect
  end
  @elem_stack.pop.children[0]
end

#handle_data(data) ⇒ Object



41
42
43
# File 'lib/webunit/parser.rb', line 41

def handle_data(data)
  @elem_stack.last.append data.squeeze( " \n" ).strip
end

#ignore(arr) ⇒ Object

ignore tag



204
205
206
207
208
209
210
# File 'lib/webunit/parser.rb', line 204

def ignore( arr )
  arr.each do |t|
    self.instance_eval( "def start_#{t}( a ); end" )
    self.instance_eval( "def end_#{t}; end" )
    self.instance_eval( "def do_#{t}( a ); end" )
  end
end

#start_a(attrs) ⇒ Object

special start and end ( a, form, table, … )



74
75
76
77
78
79
80
81
82
83
# File 'lib/webunit/parser.rb', line 74

def start_a( attrs )
  ah = attrs_to_hash attrs
  unless ah["href"] =~ /^#/
    ah["href"] = complete_url( ah["href"], @response.url )
    ah["href"].gsub!( "&", "&" )
  end
  elem = Link::new( ah )
  @response.add_link( elem )
  starttag( elem )
end

#start_form(attrs) ⇒ Object



85
86
87
88
89
90
91
92
93
# File 'lib/webunit/parser.rb', line 85

def start_form( attrs )
  ah = attrs_to_hash attrs
  ah["action"] = complete_url( ah["action"], @response.url )
  ah["action"].gsub!( "&", "&" )
  elem = Form::new( ah )
  @response.add_form elem
  @form_stack.push elem
  starttag( elem )
end

#start_option(attrs) ⇒ Object



110
111
112
113
114
115
# File 'lib/webunit/parser.rb', line 110

def start_option( attrs )
  ah = attrs_to_hash attrs
  elem = SelectOption::new( ah )
  @form_stack.last.parameters.last.add_option elem
  starttag( elem )
end

#start_select(attrs) ⇒ Object



99
100
101
102
103
104
# File 'lib/webunit/parser.rb', line 99

def start_select( attrs )
  ah = attrs_to_hash attrs
  elem = Select::new( ah )
  @form_stack.last.add_param elem
  starttag( elem )
end

#start_table(attrs) ⇒ Object



124
125
126
127
128
129
130
# File 'lib/webunit/parser.rb', line 124

def start_table( attrs )
  ah = attrs_to_hash attrs
  elem = Table::new( ah )
  @response.add_table elem
  @table_stack.push elem
  starttag( elem )
end

#start_td(attrs) ⇒ Object



148
149
150
151
152
# File 'lib/webunit/parser.rb', line 148

def start_td( attrs )
  elem = TableCell::new( 'td', attrs_to_hash( attrs ) )
  @table_stack.last.add_cell( elem )
  starttag( elem )
end

#start_textarea(attrs) ⇒ Object



117
118
119
120
121
122
# File 'lib/webunit/parser.rb', line 117

def start_textarea( attrs )
  ah = attrs_to_hash attrs
  elem = Textarea::new( ah )
  @form_stack.last.add_param elem
  starttag( elem )
end

#start_th(attrs) ⇒ Object



142
143
144
145
146
# File 'lib/webunit/parser.rb', line 142

def start_th( attrs )
  elem = TableCell::new( 'th', attrs_to_hash( attrs ) )
  @table_stack.last.add_cell( elem )
  starttag( elem )
end

#start_tr(attrs) ⇒ Object



136
137
138
139
140
# File 'lib/webunit/parser.rb', line 136

def start_tr( attrs )
  elem = TableRow::new( attrs_to_hash( attrs ) )
  starttag( elem )
  @table_stack.last.add_row( attrs_to_hash( attrs ) )
end

#starttag(elem) ⇒ Object



45
46
47
48
# File 'lib/webunit/parser.rb', line 45

def starttag( elem )
  @elem_stack.last.append elem
  @elem_stack.push elem
end

#unknown_endtag(tag) ⇒ Object



68
69
70
# File 'lib/webunit/parser.rb', line 68

def unknown_endtag( tag )
  endtag( tag )
end

#unknown_starttag(tag, attrs) ⇒ Object



63
64
65
66
# File 'lib/webunit/parser.rb', line 63

def unknown_starttag( tag, attrs )
  elem = HtmlElem::new( tag, attrs_to_hash( attrs ) )
  starttag( elem )
end