Module: Hpricot::Traverse

Included in:
Container::Trav, Leaf::Trav
Defined in:
lib/hpricot/traverse.rb,
lib/hpricot/modules.rb,
lib/hpricot/elements.rb,
lib/hpricot/traverse.rb

Overview

:startdoc:

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.filter(tok, &blk) ⇒ Object



371
372
373
# File 'lib/hpricot/elements.rb', line 371

def self.filter(tok, &blk)
  define_method("filter[#{tok.is_a?(String) ? tok : tok.inspect}]", &blk)
end

Instance Method Details

#after(html = nil, &blk) ⇒ Object

Adds elements immediately after this element, contained in the html string.



121
122
123
# File 'lib/hpricot/traverse.rb', line 121

def after(html = nil, &blk)
  parent.insert_after(make(html, &blk), self)
end

#at(expr) ⇒ Object Also known as: %

Find the first matching node for the CSS or XPath expr string.



334
335
336
# File 'lib/hpricot/traverse.rb', line 334

def at(expr)
  search(expr).first
end

#before(html = nil, &blk) ⇒ Object

Adds elements immediately before this element, contained in the html string.



126
127
128
# File 'lib/hpricot/traverse.rb', line 126

def before(html = nil, &blk)
  parent.insert_before(make(html, &blk), self)
end

#bogusetag?Boolean

Is this object a stranded end tag?

Returns:

  • (Boolean)


21
# File 'lib/hpricot/traverse.rb', line 21

def bogusetag?() BogusETag::Trav === self end

#children_of_type(tag_name) ⇒ Object

Find children of a given tag_name.

ele.children_of_type('p')
  #=> [...array of paragraphs...]


383
384
385
386
387
388
389
# File 'lib/hpricot/traverse.rb', line 383

def children_of_type(tag_name)
  if respond_to? :children
    children.find_all do |x|
      x.respond_to?(:pathname) && x.pathname == tag_name
    end
  end
end

#clean_path(path) ⇒ Object



196
197
198
# File 'lib/hpricot/traverse.rb', line 196

def clean_path(path)
  path.gsub(/^\s+|\s+$/, '')
end

#comment?Boolean

Is this object a comment?

Returns:

  • (Boolean)


19
# File 'lib/hpricot/traverse.rb', line 19

def comment?() Comment::Trav === self end

#css_pathObject

Builds a unique CSS string for this node, from the root of the document containing it.



219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/hpricot/traverse.rb', line 219

def css_path
  if elem? and has_attribute? 'id'
    "##{get_attribute('id')}"
  else
    sim, i, id = 0, 0, 0
    parent.children.each do |e|
      id = sim if e == self
      sim += 1 if e.pathname == self.pathname
    end
    p = parent.css_path
    p = p ? "#{p} > #{self.pathname}" : self.pathname
    p += ":nth(#{id})" if sim >= 2
    p
  end
end

#doc?Boolean

Is this object the enclosing HTML or XML document?

Returns:

  • (Boolean)


7
# File 'lib/hpricot/traverse.rb', line 7

def doc?() Doc::Trav === self end

#doctype?Boolean

Is this object a doctype tag?

Returns:

  • (Boolean)


15
# File 'lib/hpricot/traverse.rb', line 15

def doctype?() DocType::Trav === self end

#elem?Boolean

Is this object an HTML or XML element?

Returns:

  • (Boolean)


9
# File 'lib/hpricot/traverse.rb', line 9

def elem?() Elem::Trav === self end

#followingObject

Find all nodes which follow the current one.



114
115
116
117
118
# File 'lib/hpricot/traverse.rb', line 114

def following
  sibs = parent.children 
  si = sibs.index(self) + 1 
  return Elements[*sibs[si...sibs.length]] 
end

#get_subnode(*indexes) ⇒ Object



138
139
140
141
142
143
144
# File 'lib/hpricot/traverse.rb', line 138

def get_subnode(*indexes)
  n = self
  indexes.each {|index|
    n = n.get_subnode_internal(index)
  }
  n
end

#html(inner = nil, &blk) ⇒ Object Also known as: inner_html

Builds an HTML string from the contents of this node.



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/hpricot/traverse.rb', line 164

def html(inner = nil, &blk)
  if inner or blk
    altered!
    case inner
    when Array
      self.children = inner
    else
      self.children = make(inner, &blk)
    end
    reparent self.children
  else
    if respond_to? :children
      children.map { |x| x.output("") }.join
    end
  end
end

#index(name) ⇒ Object



47
48
49
50
51
52
53
54
55
56
# File 'lib/hpricot/traverse.rb', line 47

def index(name)
  i = 0
  return i if name == "*"
  children.each do |x|
    return i if (x.respond_to?(:name) and name == x.name) or
      (x.text? and name == "text()")
    i += 1
  end
  -1
end

#inner_html=(inner) ⇒ Object Also known as: innerHTML=

Inserts new contents into the current node, based on the HTML contained in string inner.



185
186
187
# File 'lib/hpricot/traverse.rb', line 185

def inner_html=(inner)
  html(inner || [])
end

#inner_textObject Also known as: innerText

Builds a string from the text contained in this node. All HTML elements are removed.



156
157
158
159
160
# File 'lib/hpricot/traverse.rb', line 156

def inner_text
  if respond_to? :children
    children.map { |x| x.inner_text }.join
  end
end

#make(input = nil, &blk) ⇒ Object

Parses an HTML string, making an HTML fragment based on the options used to create the container document.



25
26
27
28
29
30
31
# File 'lib/hpricot/traverse.rb', line 25

def make(input = nil, &blk)
  if parent and parent.respond_to? :make
    parent.make(input, &blk)
  else
    Hpricot.make(input, &blk)
  end
end

#nextObject Also known as: next_node

Returns the node neighboring this node to the south: just below it. This method includes text nodes and comments and such.



91
92
93
94
# File 'lib/hpricot/traverse.rb', line 91

def next
  sib = parent.children
  sib[sib.index(self) + 1] if parent
end

#node_positionObject



235
236
237
# File 'lib/hpricot/traverse.rb', line 235

def node_position
  parent.children.index(self)
end

#nodes_at(*pos) ⇒ Object

Puts together an array of neighboring nodes based on their proximity to this node. So, for example, to get the next node, you could use nodes_at(1). Or, to get the previous node, use <tt>nodes_at(1).

This method also accepts ranges and sets of numbers.

ele.nodes_at(-3..-1, 1..3) # gets three nodes before and three after
ele.nodes_at(1, 5, 7) # gets three nodes at offsets below the current node
ele.nodes_at(0, 5..6) # the current node and two others


67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/hpricot/traverse.rb', line 67

def nodes_at(*pos)
  sib = parent.children
  i, si = 0, sib.index(self)
  pos.map! do |r|
    if r.is_a?(Range) and r.begin.is_a?(String)
      r = Range.new(parent.index(r.begin)-si, parent.index(r.end)-si, r.exclude_end?)
    end
    r
  end
  p pos
  Elements[*
    sib.select do |x|
      sel =
        case i - si when *pos
          true
        end
      i += 1
      sel
    end
  ]
end

#positionObject



239
240
241
# File 'lib/hpricot/traverse.rb', line 239

def position
  parent.children_of_type(self.pathname).index(self)
end

#precedingObject

Find all preceding nodes.



107
108
109
110
111
# File 'lib/hpricot/traverse.rb', line 107

def preceding
  sibs = parent.children
  si = sibs.index(self) 
  return Elements[*sibs[0...si]] 
end

#previousObject Also known as: previous_node

Returns to node neighboring this node to the north: just above it. This method includes text nodes and comments and such.



99
100
101
102
103
# File 'lib/hpricot/traverse.rb', line 99

def previous
  sib = parent.children
  x = sib.index(self) - 1
  sib[x] if sib and x >= 0
end

#procins?Boolean

Is this object an XML processing instruction?

Returns:

  • (Boolean)


17
# File 'lib/hpricot/traverse.rb', line 17

def procins?() ProcIns::Trav === self end

#search(expr, &blk) ⇒ Object Also known as: /

Searches this node for all elements matching the CSS or XPath expr. Returns an Elements array containing the matching nodes. If blk is given, it is used to iterate through the matching set.



247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# File 'lib/hpricot/traverse.rb', line 247

def search(expr, &blk)
  if Range === expr
    return Elements.expand(at(expr.begin), at(expr.end), expr.exclude_end?)
  end
  last = nil
  nodes = [self]
  done = []
  expr = expr.to_s
  hist = []
  until expr.empty?
      expr = clean_path(expr)
      expr.gsub!(%r!^//!, '')

      case expr
      when %r!^/?\.\.!
          last = expr = $'
          nodes.map! { |node| node.parent }
      when %r!^[>/]\s*!
          last = expr = $'
          nodes = Elements[*nodes.map { |node| node.children if node.respond_to? :children }.flatten.compact]
      when %r!^\+!
          last = expr = $'
          nodes.map! do |node|
              siblings = node.parent.children
              siblings[siblings.index(node)+1]
          end
          nodes.compact!
      when %r!^~!
          last = expr = $'
          nodes.map! do |node|
              siblings = node.parent.children
              siblings[(siblings.index(node)+1)..-1]
          end
          nodes.flatten!
      when %r!^[|,]!
          last = expr = " #$'"
          nodes.shift if nodes.first == self
          done += nodes
          nodes = [self]
      else
          m = expr.match(%r!^([#.]?)([a-z0-9\\*_-]*)!i).to_a
          after = $'
          mt = after[%r!:[a-z0-9\\*_-]+!i, 0]
          oop = false
          if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]")
            after = $' 
            m[2] += mt
            expr = after
          end
          if m[1] == '#'
              oid = get_element_by_id(m[2])
              nodes = oid ? [oid] : []
              expr = after
          else
              m[2] = "*" if after =~ /^\(\)/ || m[2] == "" || m[1] == "."
              ret = []
              nodes.each do |node|
                  case m[2]
                  when '*'
                      node.traverse_element { |n| ret << n }
                  else
                      if node.respond_to? :get_elements_by_tag_name
                        ret += [*node.get_elements_by_tag_name(m[2])] - [*(node unless last)]
                      end
                  end
              end
              nodes = ret
          end
          last = nil
      end

      hist << expr
      break if hist[-1] == hist[-2]
      nodes, expr = Elements.filter(nodes, expr)
  end
  nodes = done + nodes.flatten.uniq
  if blk
      nodes.each(&blk)
      self
  else
      Elements[*nodes]
  end
end

#swap(html = nil, &blk) ⇒ Object

Replace this element and its contents with the nodes contained in the html string.



133
134
135
136
# File 'lib/hpricot/traverse.rb', line 133

def swap(html = nil, &blk)
  parent.altered!
  parent.replace_child(self, make(html, &blk))
end

#text?Boolean

Is this object an HTML text node?

Returns:

  • (Boolean)


11
# File 'lib/hpricot/traverse.rb', line 11

def text?() Text::Trav === self end

#to_htmlObject Also known as: to_s

Builds an HTML string from this node and its contents. If you need to write to a stream, try calling output(io) as a method on this object.



36
37
38
# File 'lib/hpricot/traverse.rb', line 36

def to_html
  output("")
end

#to_original_htmlObject

Attempts to preserve the original HTML of the document, only outputing new tags for elements which have changed.



43
44
45
# File 'lib/hpricot/traverse.rb', line 43

def to_original_html
  output("", :preserve => true)
end

#to_plain_textObject

Builds a string from the text contained in this node. All HTML elements are removed.



148
149
150
151
152
# File 'lib/hpricot/traverse.rb', line 148

def to_plain_text
  if respond_to? :children
    children.map { |x| x.to_plain_text }.join.strip.gsub(/\n{2,}/, "\n\n")
  end
end

#traverse_element(*names, &block) ⇒ Object

traverse_element traverses elements in the tree. It yields elements in depth first order.

If names are empty, it yields all elements. If non-empty names are given, it should be list of universal names.

A nested element is yielded in depth first order as follows.

t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>') 
t.traverse_element("a", "c") {|e| p e}
# =>
{elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>}
{emptyelem <a id="1">}
{emptyelem <c id="2">}

Universal names are specified as follows.

t = Hpricot(<<'End')
<html>
<meta name="robots" content="index,nofollow">
<meta name="author" content="Who am I?">    
</html>
End
t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e}
# =>
{emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">}
{emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">}


367
368
369
370
371
372
373
374
375
376
# File 'lib/hpricot/traverse.rb', line 367

def traverse_element(*names, &block) # :yields: element
  if names.empty?
    traverse_all_element(&block)
  else
    name_set = {}
    names.each {|n| name_set[n] = true }
    traverse_some_element(name_set, &block)
  end
  nil
end

#traverse_text(&block) ⇒ Object

traverse_text traverses texts in the tree



673
674
675
676
# File 'lib/hpricot/traverse.rb', line 673

def traverse_text(&block) # :yields: text
  traverse_text_internal(&block)
  nil
end

#xmldecl?Boolean

Is this object an XML declaration?

Returns:

  • (Boolean)


13
# File 'lib/hpricot/traverse.rb', line 13

def xmldecl?() XMLDecl::Trav === self end

#xpathObject

Builds a unique XPath string for this node, from the root of the document containing it.



202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/hpricot/traverse.rb', line 202

def xpath
  if elem? and has_attribute? 'id'
    "//#{self.name}[@id='#{get_attribute('id')}']"
  else
    sim, id = 0, 0, 0
    parent.children.each do |e|
      id = sim if e == self
      sim += 1 if e.pathname == self.pathname
    end
    p = File.join(parent.xpath, self.pathname)
    p += "[#{id+1}]" if sim >= 2
    p
  end
end