Class: TagIterator
Overview
:title: TagIterator
Simple but very useful HTML/XHTML cascading parser.
Usage
# sample html
stext = <<-EOF
<body> This is a test...
<sub> S1 </sub> <sub> S2 </sub>
<DL>
<DT> A1
<DT> A2
<DT> A3
</DL>
<DL>
<DT> B1
<DT> B2
<DT> B3
</DL>
<NEST>
<P ALIGN="R">TOP</P>
<NEST>
<P>SECOND</P>
<OL>
<LI>C1
<LI>C2
<LI>C3
<LI>C4
</OL>
</NEST>
<OL>
<LI>D1
<LI>D2
<LI>D3
<LI>D4
</OL>
</NEST>
</body>
EOF
a = TagIterator.new(stext)
a.first("body") do |y|
y.nth("dl",2) do |dl|
dl.enumtag("dt") do |t|
puts t.text.strip
end
end
y.first("nest") do |n|
n.first("p") do |c|
print c.text, ' '
puts c.attributes.collect{ |k,v| "#{k}=#{v}" }
end.next("nest") do |m|
m.first("p") do |c|
puts c.text
end.next("ol") do |o|
o.enumtag("li") do |i| puts i.text.strip end
end
end.next("ol") do |o|
o.enumtag("li") do |i| puts i.text.strip end
end
end
end
a.each_block("sub") do |y|
puts y.text.strip
end
produces
B1
B2
B3
TOP align=R
SECOND
C1
C2
C3
C4
D1
D2
D3
D4
S1
S2
Author(s)
*ɂႷ <[email protected]>
Instance Attribute Summary collapse
-
#attributes ⇒ Object
readonly
Returns the value of attribute attributes.
-
#option ⇒ Object
Returns the value of attribute option.
-
#tag ⇒ Object
readonly
Returns the value of attribute tag.
-
#text ⇒ Object
readonly
Returns the value of attribute text.
Class Method Summary collapse
Instance Method Summary collapse
- #collect(*arg) ⇒ Object
- #each_block(tag, closetag = nil) ⇒ Object
- #enumcollect(tag) ⇒ Object
- #enumtag(tag) ⇒ Object
- #first(tag, *arg) ⇒ Object (also: #next)
- #for_this {|_self| ... } ⇒ Object
- #get_first(*arg) ⇒ Object
- #get_nth(*arg) ⇒ Object
- #nth(tag, n, closetag = nil) {|self.class.new(text[s..e],tag,parse_attribute(d))| ... } ⇒ Object
- #nth_tailer(tag, n) ⇒ Object
- #tagexist?(tag, st = 0) ⇒ Boolean
- #tagnext ⇒ Object
Instance Attribute Details
#attributes ⇒ Object (readonly)
Returns the value of attribute attributes.
138 139 140 |
# File 'lib/mega/tagiter.rb', line 138 def attributes @attributes end |
#option ⇒ Object
Returns the value of attribute option.
136 137 138 |
# File 'lib/mega/tagiter.rb', line 136 def option @option end |
#tag ⇒ Object (readonly)
Returns the value of attribute tag.
137 138 139 |
# File 'lib/mega/tagiter.rb', line 137 def tag @tag end |
#text ⇒ Object (readonly)
Returns the value of attribute text.
135 136 137 |
# File 'lib/mega/tagiter.rb', line 135 def text @text end |
Class Method Details
.[](aname) ⇒ Object
148 149 150 |
# File 'lib/mega/tagiter.rb', line 148 def @attributes.[](aname) super aname.downcase end |
Instance Method Details
#collect(*arg) ⇒ Object
252 253 254 255 256 |
# File 'lib/mega/tagiter.rb', line 252 def collect(*arg) a=[] each_block(*arg) do |tt| a.push tt end a end |
#each_block(tag, closetag = nil) ⇒ Object
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
# File 'lib/mega/tagiter.rb', line 228 def each_block(tag,closetag=nil) t=0 s,d =find_opentag(tag) raise RuntimeError,"tag(#{tag}) not found" unless s while s do if closetag then e=find_closetag(closetag,s,tag) else e=find_closetag(tag,s) end e=-1 unless e yield self.class.new(@text[s..e],tag,parse_attribute(d)) if e>=0 then t=@text.index('>',e+1) t=@text.length unless t s,d = find_opentag(tag,t) else s=false end end self.class.new(text[t+1..-1]) end |
#enumcollect(tag) ⇒ Object
268 269 270 271 272 |
# File 'lib/mega/tagiter.rb', line 268 def enumcollect(tag) a=[] enumtag(tag) do |t| a.push t end a end |
#enumtag(tag) ⇒ Object
258 259 260 261 262 263 264 265 266 |
# File 'lib/mega/tagiter.rb', line 258 def enumtag(tag) s,d = find_openenumtag(tag) while s do e=find_closeenumtag(tag,s+1) e=-1 unless e yield self.class.new(@text[s..e],tag,parse_attribute(d)) s,d = find_openenumtag(tag,s) end end |
#first(tag, *arg) ⇒ Object Also known as: next
225 |
# File 'lib/mega/tagiter.rb', line 225 def first(tag,*arg) nth(tag,1,*arg) do |f| yield f end end |
#for_this {|_self| ... } ⇒ Object
274 275 276 |
# File 'lib/mega/tagiter.rb', line 274 def for_this yield self end |
#get_first(*arg) ⇒ Object
280 |
# File 'lib/mega/tagiter.rb', line 280 def get_first(*arg) r=nil; first(*arg) do |bl| r=bl end; r; end |
#get_nth(*arg) ⇒ Object
278 |
# File 'lib/mega/tagiter.rb', line 278 def get_nth(*arg) r=nil; nth(*arg) do |bl| r=bl end; r; end |
#nth(tag, n, closetag = nil) {|self.class.new(text[s..e],tag,parse_attribute(d))| ... } ⇒ Object
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
# File 'lib/mega/tagiter.rb', line 202 def nth(tag,n,closetag=nil) raise RuntimeError,"nth: number not specified" unless n t=0 e=s=0 # for their scope d=nil 1.upto(n) do |i| s,d = find_opentag(tag,t) raise RuntimeError,"tag(#{tag}) not found at(#{i})" unless s if closetag then e=find_closetag(closetag,s,tag) else e=find_closetag(tag,s) end e=-1 unless e t=@text.index('>',e+1) t=@text.length unless t end yield self.class.new(text[s..e],tag,parse_attribute(d)) self.class.new(text[t+1..-1]) end |
#nth_tailer(tag, n) ⇒ Object
295 296 297 |
# File 'lib/mega/tagiter.rb', line 295 def nth_tailer(tag,n) nth(tag,n) do end end |
#tagexist?(tag, st = 0) ⇒ Boolean
282 283 284 285 |
# File 'lib/mega/tagiter.rb', line 282 def tagexist?(tag,st=0) s=find_element(tag,st) if s then true else false end end |
#tagnext ⇒ Object
287 288 289 290 291 292 293 |
# File 'lib/mega/tagiter.rb', line 287 def tagnext s=@text.index("<") return nil unless s e=@text.index(">",s) return nil unless s @text[s..e].scan(/[^<>\s]+/)[0] end |