Class: WebUnit::Parser
Constant Summary
collapse
- SingleTags =
%w( meta br hr base link area )
- @@ignoretags =
[]
Constants inherited
from SGMLParser
SGMLParser::Attrfind, SGMLParser::Charref, SGMLParser::Commentclose, SGMLParser::Commentopen, SGMLParser::Endbracket, SGMLParser::Endtagopen, SGMLParser::Entitydefs, SGMLParser::Entityref, SGMLParser::Incomplete, SGMLParser::Interesting, SGMLParser::Special, SGMLParser::Starttagopen, SGMLParser::Tagfind
Class Method Summary
collapse
Instance Method Summary
collapse
Methods included from Utils
#complete_url, #orthop_url, #parse_url
Methods inherited from SGMLParser
#close, #finish_endtag, #finish_starttag, #goahead, #handle_charref, #handle_comment, #handle_endtag, #handle_entityref, #handle_special, #handle_starttag, #has_context, #parse_comment, #parse_endtag, #parse_special, #parse_starttag, #report_unbalanced, #reset, #setliteral, #setnomoretags, #unknown_charref, #unknown_entityref
Constructor Details
#initialize ⇒ Parser
Returns a new instance of Parser.
22
23
24
25
26
27
28
|
# File 'lib/webunit/parser.rb', line 22
def initialize()
super( NullFormatter.new )
@elem_stack = [ HtmlElem::new( nil, nil ) ]
@form_stack = []
@table_stack = []
self.ignore @@ignoretags
end
|
Class Method Details
.ignore(arr) ⇒ Object
212
213
214
|
# File 'lib/webunit/parser.rb', line 212
def self::ignore( arr )
@@ignoretags = arr
end
|
Instance Method Details
#attrs_to_hash(attrs) ⇒ Object
218
219
220
221
222
223
224
225
|
# File 'lib/webunit/parser.rb', line 218
def attrs_to_hash( attrs )
h = {}
for a, v in attrs
v = v.split('"')[1]
h[a] = v ? v : ''
end
h
end
|
#do_frame(attrs) ⇒ Object
189
190
191
192
193
194
|
# File 'lib/webunit/parser.rb', line 189
def do_frame( attrs )
ah = attrs_to_hash attrs
ah['src'] = complete_url( ah['src'], @response.url )
elem = Frame::new( ah )
@response.add_frame( elem )
end
|
#do_img(attrs) ⇒ Object
196
197
198
199
200
201
|
# File 'lib/webunit/parser.rb', line 196
def do_img( attrs )
ah = attrs_to_hash attrs
elem = Image::new( ah )
@response.add_image( elem )
@elem_stack.last.children.push elem
end
|
special do ( input, frame, … )
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
|
# File 'lib/webunit/parser.rb', line 166
def do_input( attrs )
ah = attrs_to_hash attrs
case ah['type']
when 'submit'
elem = InputSubmit::new( ah )
when 'reset'
elem = InputReset::new( ah )
when 'checkbox'
elem = InputCheckbox::new( ah )
when 'radio'
elem = InputRadio::new( ah )
when 'image'
elem = InputImage::new( ah )
when 'file'
elem = InputFile::new( ah )
else elem = Input::new( ah )
end
@form_stack.last.add_param elem if @form_stack.last
@elem_stack.last.children.push elem
end
|
#dotag(tag, attrs) ⇒ Object
155
156
157
158
|
# File 'lib/webunit/parser.rb', line 155
def dotag( tag, attrs )
elem = HtmlElem::new( tag, attrs_to_hash( attrs ) )
@elem_stack.last.children.push elem
end
|
94
95
96
97
|
# File 'lib/webunit/parser.rb', line 94
def end_form
endtag( 'form' )
@form_stack.pop
end
|
#end_select ⇒ Object
105
106
107
108
|
# File 'lib/webunit/parser.rb', line 105
def end_select
endtag( 'select' )
@form_stack.last.parameters.last.end_option
end
|
#end_table ⇒ Object
131
132
133
134
|
# File 'lib/webunit/parser.rb', line 131
def end_table
endtag( 'table' )
@table_stack.pop
end
|
#endtag(tag) ⇒ Object
50
51
52
53
54
55
56
57
58
59
60
61
|
# File 'lib/webunit/parser.rb', line 50
def endtag( tag )
elem = @elem_stack.pop
if tag != elem.tag
if SingleTags.include?(tag)
@elem_stack.push(elem)
else
puts "'#{tag}'(wait for '#{elem.tag.to_s}')" if $DEBUG
raise BadHtmlTags, "'#{tag}'(wait for '#{elem.tag.to_s}')"
end
end
elem
end
|
#feed(response) ⇒ Object
30
31
32
33
34
35
36
37
38
39
|
# File 'lib/webunit/parser.rb', line 30
def feed( response )
@response = response
begin
$stderr.puts HTMLSplit.new( @response.body ).repair.to_s if $DEBUG
super( HTMLSplit.new( @response.body ).repair.to_s )
rescue
raise BadHtmlTags, $!.message + @elem_stack.inspect
end
@elem_stack.pop.children[0]
end
|
#handle_data(data) ⇒ Object
41
42
43
|
# File 'lib/webunit/parser.rb', line 41
def handle_data(data)
@elem_stack.last.append data.squeeze( " \n" ).strip
end
|
#ignore(arr) ⇒ Object
204
205
206
207
208
209
210
|
# File 'lib/webunit/parser.rb', line 204
def ignore( arr )
arr.each do |t|
self.instance_eval( "def start_#{t}( a ); end" )
self.instance_eval( "def end_#{t}; end" )
self.instance_eval( "def do_#{t}( a ); end" )
end
end
|
#start_a(attrs) ⇒ Object
special start and end ( a, form, table, … )
74
75
76
77
78
79
80
81
82
83
|
# File 'lib/webunit/parser.rb', line 74
def start_a( attrs )
ah = attrs_to_hash attrs
unless ah["href"] =~ /^#/
ah["href"] = complete_url( ah["href"], @response.url )
ah["href"].gsub!( "&", "&" )
end
elem = Link::new( ah )
@response.add_link( elem )
starttag( elem )
end
|
85
86
87
88
89
90
91
92
93
|
# File 'lib/webunit/parser.rb', line 85
def start_form( attrs )
ah = attrs_to_hash attrs
ah["action"] = complete_url( ah["action"], @response.url )
ah["action"].gsub!( "&", "&" )
elem = Form::new( ah )
@response.add_form elem
@form_stack.push elem
starttag( elem )
end
|
#start_option(attrs) ⇒ Object
110
111
112
113
114
115
|
# File 'lib/webunit/parser.rb', line 110
def start_option( attrs )
ah = attrs_to_hash attrs
elem = SelectOption::new( ah )
@form_stack.last.parameters.last.add_option elem
starttag( elem )
end
|
#start_select(attrs) ⇒ Object
99
100
101
102
103
104
|
# File 'lib/webunit/parser.rb', line 99
def start_select( attrs )
ah = attrs_to_hash attrs
elem = Select::new( ah )
@form_stack.last.add_param elem
starttag( elem )
end
|
#start_table(attrs) ⇒ Object
124
125
126
127
128
129
130
|
# File 'lib/webunit/parser.rb', line 124
def start_table( attrs )
ah = attrs_to_hash attrs
elem = Table::new( ah )
@response.add_table elem
@table_stack.push elem
starttag( elem )
end
|
#start_td(attrs) ⇒ Object
148
149
150
151
152
|
# File 'lib/webunit/parser.rb', line 148
def start_td( attrs )
elem = TableCell::new( 'td', attrs_to_hash( attrs ) )
@table_stack.last.add_cell( elem )
starttag( elem )
end
|
#start_textarea(attrs) ⇒ Object
117
118
119
120
121
122
|
# File 'lib/webunit/parser.rb', line 117
def start_textarea( attrs )
ah = attrs_to_hash attrs
elem = Textarea::new( ah )
@form_stack.last.add_param elem
starttag( elem )
end
|
#start_th(attrs) ⇒ Object
142
143
144
145
146
|
# File 'lib/webunit/parser.rb', line 142
def start_th( attrs )
elem = TableCell::new( 'th', attrs_to_hash( attrs ) )
@table_stack.last.add_cell( elem )
starttag( elem )
end
|
#start_tr(attrs) ⇒ Object
136
137
138
139
140
|
# File 'lib/webunit/parser.rb', line 136
def start_tr( attrs )
elem = TableRow::new( attrs_to_hash( attrs ) )
starttag( elem )
@table_stack.last.add_row( attrs_to_hash( attrs ) )
end
|
#starttag(elem) ⇒ Object
45
46
47
48
|
# File 'lib/webunit/parser.rb', line 45
def starttag( elem )
@elem_stack.last.append elem
@elem_stack.push elem
end
|
#unknown_endtag(tag) ⇒ Object
68
69
70
|
# File 'lib/webunit/parser.rb', line 68
def unknown_endtag( tag )
endtag( tag )
end
|
#unknown_starttag(tag, attrs) ⇒ Object
63
64
65
66
|
# File 'lib/webunit/parser.rb', line 63
def unknown_starttag( tag, attrs )
elem = HtmlElem::new( tag, attrs_to_hash( attrs ) )
starttag( elem )
end
|