Class: TagIterator

Inherits:
Object show all
Defined in:
lib/mega/tagiter.rb

Overview

:title: TagIterator

Simple but very useful HTML/XHTML cascading parser.

Usage

# sample html
stext = <<-EOF
<body> This is a test...
  <sub> S1 </sub> <sub> S2 </sub>
  <DL>
    <DT> A1
    <DT> A2
    <DT> A3
  </DL>
  <DL>
    <DT> B1
    <DT> B2
    <DT> B3
  </DL>
  <NEST>
    <P ALIGN="R">TOP</P>
    <NEST>
      <P>SECOND</P>
      <OL>
        <LI>C1
        <LI>C2
        <LI>C3
        <LI>C4
      </OL>
    </NEST>
    <OL>
      <LI>D1
      <LI>D2
      <LI>D3
      <LI>D4
    </OL>
  </NEST>
</body>
EOF

a = TagIterator.new(stext)
a.first("body") do |y|
  y.nth("dl",2) do |dl|
    dl.enumtag("dt") do |t|
      puts t.text.strip
    end
  end
  y.first("nest") do |n|
    n.first("p") do |c| 
      print c.text, ' '
      puts c.attributes.collect{ |k,v| "#{k}=#{v}" }
    end.next("nest") do |m|
      m.first("p") do |c| 
        puts c.text
      end.next("ol") do |o|
        o.enumtag("li") do |i| puts i.text.strip end
      end
    end.next("ol") do |o|
      o.enumtag("li") do |i| puts i.text.strip end
    end
  end
end
a.each_block("sub") do |y|
  puts y.text.strip
end

produces

B1
B2
B3
TOP align=R
SECOND
C1
C2
C3
C4
D1
D2
D3
D4
S1
S2

Author(s)

*ɂႷ <[email protected]>

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#attributesObject (readonly)

Returns the value of attribute attributes.



138
139
140
# File 'lib/mega/tagiter.rb', line 138

def attributes
  @attributes
end

#optionObject

Returns the value of attribute option.



136
137
138
# File 'lib/mega/tagiter.rb', line 136

def option
  @option
end

#tagObject (readonly)

Returns the value of attribute tag.



137
138
139
# File 'lib/mega/tagiter.rb', line 137

def tag
  @tag
end

#textObject (readonly)

Returns the value of attribute text.



135
136
137
# File 'lib/mega/tagiter.rb', line 135

def text
  @text
end

Class Method Details

.[](aname) ⇒ Object



148
149
150
# File 'lib/mega/tagiter.rb', line 148

def @attributes.[](aname)
  super aname.downcase
end

Instance Method Details

#collect(*arg) ⇒ Object



252
253
254
255
256
# File 'lib/mega/tagiter.rb', line 252

def collect(*arg)
  a=[]
  each_block(*arg) do |tt| a.push tt end
  a
end

#each_block(tag, closetag = nil) ⇒ Object

Raises:

  • (RuntimeError)


228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/mega/tagiter.rb', line 228

def each_block(tag,closetag=nil)
  t=0
  s,d =find_opentag(tag)
  raise RuntimeError,"tag(#{tag}) not found" unless s

  while s do
    if closetag then
      e=find_closetag(closetag,s,tag)
    else
      e=find_closetag(tag,s)
    end
    e=-1 unless e
    yield self.class.new(@text[s..e],tag,parse_attribute(d))
    if e>=0 then 
      t=@text.index('>',e+1)
      t=@text.length unless t
      s,d = find_opentag(tag,t)
    else
      s=false
    end
  end
  self.class.new(text[t+1..-1])
end

#enumcollect(tag) ⇒ Object



268
269
270
271
272
# File 'lib/mega/tagiter.rb', line 268

def enumcollect(tag)
  a=[]
  enumtag(tag) do |t| a.push t end
  a
end

#enumtag(tag) ⇒ Object



258
259
260
261
262
263
264
265
266
# File 'lib/mega/tagiter.rb', line 258

def enumtag(tag)
  s,d = find_openenumtag(tag)
  while s do
    e=find_closeenumtag(tag,s+1)
    e=-1 unless e
    yield self.class.new(@text[s..e],tag,parse_attribute(d))
    s,d = find_openenumtag(tag,s)
  end
end

#first(tag, *arg) ⇒ Object Also known as: next



225
# File 'lib/mega/tagiter.rb', line 225

def first(tag,*arg) nth(tag,1,*arg) do |f| yield f end end

#for_this {|_self| ... } ⇒ Object

Yields:

  • (_self)

Yield Parameters:

  • _self (TagIterator)

    the object that the method was called on



274
275
276
# File 'lib/mega/tagiter.rb', line 274

def for_this
  yield self
end

#get_first(*arg) ⇒ Object



280
# File 'lib/mega/tagiter.rb', line 280

def get_first(*arg) r=nil; first(*arg) do |bl| r=bl end; r; end

#get_nth(*arg) ⇒ Object



278
# File 'lib/mega/tagiter.rb', line 278

def get_nth(*arg) r=nil; nth(*arg) do |bl| r=bl end; r; end

#nth(tag, n, closetag = nil) {|self.class.new(text[s..e],tag,parse_attribute(d))| ... } ⇒ Object

Yields:

  • (self.class.new(text[s..e],tag,parse_attribute(d)))

Raises:

  • (RuntimeError)


202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/mega/tagiter.rb', line 202

def nth(tag,n,closetag=nil)
  raise RuntimeError,"nth: number not specified" unless n
  t=0
  e=s=0   # for their scope
  d=nil

  1.upto(n) do |i|
    s,d = find_opentag(tag,t)
    raise RuntimeError,"tag(#{tag}) not found at(#{i})" unless s

    if closetag then
      e=find_closetag(closetag,s,tag)
    else
      e=find_closetag(tag,s)
    end
    e=-1 unless e
    t=@text.index('>',e+1)
    t=@text.length unless t
  end
  yield self.class.new(text[s..e],tag,parse_attribute(d))
  self.class.new(text[t+1..-1])
end

#nth_tailer(tag, n) ⇒ Object



295
296
297
# File 'lib/mega/tagiter.rb', line 295

def nth_tailer(tag,n)
  nth(tag,n) do end
end

#tagexist?(tag, st = 0) ⇒ Boolean

Returns:

  • (Boolean)


282
283
284
285
# File 'lib/mega/tagiter.rb', line 282

def tagexist?(tag,st=0)
  s=find_element(tag,st)
  if s then true else false end
end

#tagnextObject



287
288
289
290
291
292
293
# File 'lib/mega/tagiter.rb', line 287

def tagnext
  s=@text.index("<")
  return nil unless s
  e=@text.index(">",s)
  return nil unless s
  @text[s..e].scan(/[^<>\s]+/)[0]
end