Class: HTML::Node

Inherits:
Object
  • Object
show all
Defined in:
lib/html/node.rb,
lib/html/node_ext.rb

Overview

The base class of all nodes, textual and otherwise, in an HTML document.

Direct Known Subclasses

Tag, Text

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(parent, line = 0, pos = 0) ⇒ Node

Create a new node as a child of the given parent.



72
73
74
75
76
# File 'lib/html/node.rb', line 72

def initialize(parent, line=0, pos=0)
  @parent = parent
  @children = []
  @line, @position = line, pos
end

Instance Attribute Details

#childrenObject (readonly)

The array of children of this node. Not all nodes have children.



59
60
61
# File 'lib/html/node.rb', line 59

def children
  @children
end

#lineObject (readonly)

The line number of the input where this node was begun



66
67
68
# File 'lib/html/node.rb', line 66

def line
  @line
end

#parentObject (readonly)

The parent node of this node. All nodes have a parent, except for the root node.



63
64
65
# File 'lib/html/node.rb', line 63

def parent
  @parent
end

#positionObject (readonly)

The byte position in the input where this node was begun



69
70
71
# File 'lib/html/node.rb', line 69

def position
  @position
end

Class Method Details

.parse(parent, line, pos, content, strict = true) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/html/node.rb', line 139

def parse(parent, line, pos, content, strict=true)
  if content !~ /^<\S/
    Text.new(parent, line, pos, content)
  else
    scanner = StringScanner.new(content)

    unless scanner.skip(/</)
      if strict
        raise "expected <"
      else
        return Text.new(parent, line, pos, content)
      end
    end

    if scanner.skip(/!\[CDATA\[/)
      scanner.scan_until(/\]\]>/)
      return CDATA.new(parent, line, pos, scanner.pre_match)
    end
    
    closing = ( scanner.scan(/\//) ? :close : nil )
    return Text.new(parent, line, pos, content) unless name = scanner.scan(/[\w:]+/)
    name.downcase!
  
    unless closing
      scanner.skip(/\s*/)
      attributes = {}
      while attr = scanner.scan(/[-\w:]+/)
        value = true
        if scanner.scan(/\s*=\s*/)
          if delim = scanner.scan(/['"]/)
            value = ""
            while text = scanner.scan(/[^#{delim}\\]+|./)
              case text
                when "\\" then
                  value << text
                  value << scanner.getch
                when delim
                  break
                else value << text
              end
            end
          else
            value = scanner.scan(/[^\s>\/]+/)
          end
        end
        attributes[attr.downcase] = value
        scanner.skip(/\s*/)
      end
    
      closing = ( scanner.scan(/\//) ? :self : nil )
    end
    
    unless scanner.scan(/\s*>/)
      if strict
        raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})" 
      else
        # throw away all text until we find what we're looking for
        scanner.skip_until(/>/) or scanner.terminate
      end
    end

    Tag.new(parent, line, pos, name, attributes, closing)
  end
end

Instance Method Details

#==(node) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
# File 'lib/html/node.rb', line 126

def ==(node)
  return false unless self.class == node.class && children.size == node.children.size

  equivalent = true

  children.size.times do |i|
    equivalent &&= children[i] == node.children[i]
  end

  equivalent
end

#detachObject

Detach this node from its parent.



64
65
66
67
68
69
70
# File 'lib/html/node_ext.rb', line 64

def detach()
  if @parent
    @parent.children.delete_if { |child| child.equal?(self) }
    @parent = nil
  end
  self
end

#each(value = nil) {|_self, value| ... } ⇒ Object

Process each node beginning with the current node.

Yields:

  • (_self, value)

Yield Parameters:

  • _self (HTML::Node)

    the object that the method was called on



74
75
76
77
78
79
80
81
82
# File 'lib/html/node_ext.rb', line 74

def each(value = nil, &block)
  yield self, value
  if @children
    @children.each do |child|
      child.each value, &block
    end
  end
  value
end

#find(conditions) ⇒ Object

Search the children of this node for the first node for which #find returns non nil. Returns the result of the #find call that succeeded.



93
94
95
96
97
98
99
100
101
# File 'lib/html/node.rb', line 93

def find(conditions)
  conditions = validate_conditions(conditions)

  @children.each do |child|        
    node = child.find(conditions)
    return node if node
  end
  nil
end

#find_all(conditions) ⇒ Object

Search for all nodes that match the given conditions, and return them as an array.



105
106
107
108
109
110
111
112
113
114
# File 'lib/html/node.rb', line 105

def find_all(conditions)
  conditions = validate_conditions(conditions)

  matches = []
  matches << self if match(conditions)
  @children.each do |child|
    matches.concat child.find_all(conditions)
  end
  matches
end

#match(conditions) ⇒ Object

Return false (subclasses must override this to provide specific matching behavior.) conditions may be of any type.



87
88
89
# File 'lib/html/node.rb', line 87

def match(conditions)
  false
end

#next_element(name = nil) ⇒ Object

Return the next element after this one. Skips sibling text nodes.

With the name argument, returns the next element with that name, skipping other sibling elements.



31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/html/node_ext.rb', line 31

def next_element(name = nil)
  if siblings = parent.children
    found = false
    siblings.each do |node|
      if node.equal?(self)
        found = true
      elsif found && node.tag?
        return node if (name.nil? || node.name == name)
      end
    end
  end
  nil
end

#next_siblingObject

Returns the next sibling node.



6
7
8
9
10
11
12
13
# File 'lib/html/node_ext.rb', line 6

def next_sibling()
  if siblings = parent.children
    siblings.each_with_index do |node, i|
      return siblings[i + 1] if node.equal?(self)
    end
  end
  nil
end

#previous_element(name = nil) ⇒ Object

Return the previous element before this one. Skips sibling text nodes.

Using the name argument, returns the previous element with that name, skipping other sibling elements.



51
52
53
54
55
56
57
58
59
60
# File 'lib/html/node_ext.rb', line 51

def previous_element(name = nil)
  if siblings = parent.children
    found = nil
    siblings.each do |node|
      return found if node.equal?(self)
      found = node if node.tag? && (name.nil? || node.name == name)
    end
  end
  nil
end

#previous_siblingObject

Returns the previous sibling node.



17
18
19
20
21
22
23
24
# File 'lib/html/node_ext.rb', line 17

def previous_sibling()
  if siblings = parent.children
    siblings.each_with_index do |node, i|
      return siblings[i - 1] if node.equal?(self)
    end
  end
  nil
end

#tag?Boolean

Returns false. Subclasses may override this if they define a kind of tag.

Returns:

  • (Boolean)


118
119
120
# File 'lib/html/node.rb', line 118

def tag?
  false
end

#to_sObject

Return a textual representation of the node.



79
80
81
82
83
# File 'lib/html/node.rb', line 79

def to_s
  s = ""
  @children.each { |child| s << child.to_s }
  s
end

#validate_conditions(conditions) ⇒ Object



122
123
124
# File 'lib/html/node.rb', line 122

def validate_conditions(conditions)
  Conditions === conditions ? conditions : Conditions.new(conditions)
end