Class: Nokolexbor::NodeSet

Inherits:
Node
  • Object
show all
Includes:
Enumerable
Defined in:
lib/nokolexbor/node_set.rb,
ext/nokolexbor/nl_node_set.c

Constant Summary

Constants inherited from Node

Nokolexbor::Node::ATTRIBUTE_NODE, Nokolexbor::Node::CDATA_SECTION_NODE, Nokolexbor::Node::COMMENT_NODE, Nokolexbor::Node::DOCUMENT_FRAG_NODE, Nokolexbor::Node::DOCUMENT_NODE, Nokolexbor::Node::DOCUMENT_TYPE_NODE, Nokolexbor::Node::ELEMENT_NODE, Nokolexbor::Node::ENTITY_NODE, Nokolexbor::Node::ENTITY_REF_NODE, Nokolexbor::Node::LOOKS_LIKE_XPATH, Nokolexbor::Node::NOTATION_NODE, Nokolexbor::Node::PI_NODE, Nokolexbor::Node::TEXT_NODE

Instance Attribute Summary

Attributes inherited from Node

#document

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Node

#[]=, #add_child, #add_class, #add_next_sibling, #add_previous_sibling, #add_sibling, #after, #ancestors, #append_class, #at, #at_css_impl, #at_xpath, #attribute, #attribute_nodes, #attributes, #attrs, #before, #cdata?, #child, #children=, #classes, #clone, #comment?, #content=, #css_impl, #document?, #element?, #first_element_child, #fragment, #fragment?, #key?, #keys, #kwattr_add, #kwattr_append, #kwattr_remove, #kwattr_values, #last_element_child, #matches?, #name, #next, #next_element, #node_type, #nokogiri_at_css, #parent, #parent=, #parse, #prepend_child, #previous, #previous_element, #processing_instruction?, #remove_attr, #remove_class, #replace, #search, #swap, #text?, #traverse, #values, #write_to

Class Method Details

.new(document, list = []) {|Document| ... } ⇒ Document

Create a NodeSet with document defaulting to list.

Yields:

Returns:



12
13
14
15
16
17
18
# File 'lib/nokolexbor/node_set.rb', line 12

def self.new(document, list = [])
  obj = allocate
  obj.instance_variable_set(:@document, document)
  list.each { |x| obj << x }
  yield obj if block_given?
  obj
end

Instance Method Details

#==(other) ⇒ Boolean

Returns true if two NodeSets contain the same number of elements and each element is equal to the corresponding element in the other NodeSet.

Returns:

  • (Boolean)

    true if two NodeSets contain the same number of elements and each element is equal to the corresponding element in the other NodeSet.



132
133
134
135
136
137
138
139
140
# File 'lib/nokolexbor/node_set.rb', line 132

def ==(other)
  return false unless other.is_a?(NodeSet)
  return false unless length == other.length

  each_with_index do |node, i|
    return false unless node == other[i]
  end
  true
end

#[](index) ⇒ Node? #[](start, length) ⇒ NodeSet? #[](range) ⇒ NodeSet? Also known as: slice

The Nokolexbor::Node at index, or returns a Nokolexbor::NodeSet containing nodes starting at start and continuing for length elements, or returns a Nokolexbor::NodeSet containing nodes specified by range. Negative indices count backward from the end of the node_set (-1 is the last node). Returns nil if the index (or start) are out of range.

Overloads:

Returns:

  • (Node, NodeSet, nil)

    the Nokolexbor::Node at index, or returns a Nokolexbor::NodeSet containing nodes starting at start and continuing for length elements, or returns a Nokolexbor::NodeSet containing nodes specified by range. Negative indices count backward from the end of the node_set (-1 is the last node). Returns nil if the index (or start) are out of range.



212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# File 'ext/nokolexbor/nl_node_set.c', line 212

static VALUE
nl_node_set_slice(int argc, VALUE *argv, VALUE self)
{
  VALUE arg;
  long beg, len;

  lexbor_array_t *array = nl_rb_node_set_unwrap(self);

  if (argc == 2) {
    beg = NUM2LONG(argv[0]);
    len = NUM2LONG(argv[1]);
    if (beg < 0) {
      beg += array->length;
    }
    return nl_node_set_subseq(self, beg, len);
  }

  if (argc != 1) {
    rb_scan_args(argc, argv, "11", NULL, NULL);
  }
  arg = argv[0];

  if (FIXNUM_P(arg)) {
    return nl_node_set_index_at(self, FIX2LONG(arg));
  }

  /* if arg is Range */
  switch (rb_range_beg_len(arg, &beg, &len, array->length, 0)) {
  case Qfalse:
    break;
  case Qnil:
    return Qnil;
  default:
    return nl_node_set_subseq(self, beg, len);
  }

  return nl_node_set_index_at(self, NUM2LONG(arg));
}

#at_css(selector) ⇒ Node?

Like #css, but returns the first match.

This method uses Lexbor as the selector engine. Its performance is much higher than Nokolexbor::Node#at_xpath or Nokolexbor::Node#nokogiri_at_css.

Returns:

  • (Node, nil)

    The first matched Node.

See Also:



356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'ext/nokolexbor/nl_node_set.c', line 356

static VALUE
nl_node_set_at_css(VALUE self, VALUE selector)
{
  lexbor_array_t *array = lexbor_array_create();
  lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));

  lxb_status_t status = nl_node_set_find(self, selector, nl_node_at_css_callback, array);

  if (status != LXB_STATUS_OK) {
    lexbor_array_destroy(array, true);
    nl_raise_lexbor_error(status);
  }

  if (array->length == 0) {
    lexbor_array_destroy(array, true);
    return Qnil;
  }

  nl_sort_nodes_if_necessary(selector, doc, array);

  VALUE ret = nl_rb_node_create(array->list[0], nl_rb_document_get(self));

  lexbor_array_destroy(array, true);

  return ret;
}

#childrenNodeSet

Returns A new NodeSet containing all the children of all the nodes in the NodeSet.

Returns:

  • (NodeSet)

    A new NodeSet containing all the children of all the nodes in the NodeSet.



144
145
146
147
148
149
150
# File 'lib/nokolexbor/node_set.rb', line 144

def children
  node_set = NodeSet.new(@document)
  each do |node|
    node.children.each { |n| node_set.push(n) }
  end
  node_set
end

#contentString Also known as: text, inner_text, to_str

Get the content of all contained Nodes.

Returns:

  • (String)


70
71
72
# File 'lib/nokolexbor/node_set.rb', line 70

def content
  self.map(&:content).join
end

#css(selector) ⇒ NodeSet

Search this object for CSS rules. rules must be one or more CSS selectors.

This method uses Lexbor as the selector engine. Its performance is much higher than #xpath or #nokogiri_css.

Examples:

node.css('title')
node.css('body h1.bold')
node.css('div + p.green', 'div#one')

Returns:

  • (NodeSet)

    The matched set of Nodes.

See Also:



386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
# File 'ext/nokolexbor/nl_node_set.c', line 386

static VALUE
nl_node_set_css(VALUE self, VALUE selector)
{
  lexbor_array_t *array = lexbor_array_create();
  lxb_dom_document_t *doc = nl_rb_document_unwrap(nl_rb_document_get(self));

  lxb_status_t status = nl_node_set_find(self, selector, nl_node_css_callback, array);
  if (status != LXB_STATUS_OK) {
    lexbor_array_destroy(array, true);
    nl_raise_lexbor_error(status);
  }

  nl_sort_nodes_if_necessary(selector, doc, array);

  return nl_rb_node_set_create_with_data(array, nl_rb_document_get(self));
}

#delete(node) ⇒ Node?

Delete node from the NodeSet.

Parameters:

Returns:

  • (Node, nil)

    The deleted node if found, otherwise returns nil.



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'ext/nokolexbor/nl_node_set.c', line 111

static VALUE
nl_node_set_delete(VALUE self, VALUE rb_node)
{
  lexbor_array_t *array = nl_rb_node_set_unwrap(self);
  lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);

  size_t i;
  for (i = 0; i < array->length; i++)
    if (array->list[i] == node) {
      break;
    }

  if (i >= array->length) {
    // not found
    return Qnil;
  }
  lexbor_array_delete(array, i, 1);
  return rb_node;
}

#destroyObject

Destroy all nodes in the NodeSet.



109
110
111
# File 'lib/nokolexbor/node_set.rb', line 109

def destroy
  self.each(&:destroy)
end

#each {|Node| ... } ⇒ Object

Iterate over each node.

Yields:



23
24
25
26
27
28
29
30
# File 'lib/nokolexbor/node_set.rb', line 23

def each
  return to_enum unless block_given?

  0.upto(length - 1) do |x|
    yield self[x]
  end
  self
end

#empty?Boolean

Returns true if this NodeSet is empty.

Returns:

  • (Boolean)

    true if this NodeSet is empty.



53
54
55
# File 'lib/nokolexbor/node_set.rb', line 53

def empty?
  length == 0
end

#first(n = nil) ⇒ Node+

Get the first n elements of the NodeSet.

Parameters:

  • n (Numeric, nil) (defaults to: nil)

Returns:



37
38
39
40
41
42
43
# File 'lib/nokolexbor/node_set.rb', line 37

def first(n = nil)
  return self[0] unless n

  list = []
  [n, length].min.times { |i| list << self[i] }
  list
end

#include?(node) ⇒ Boolean

Returns true if any member of this NodeSet equals node.

Returns:

  • (Boolean)

    true if any member of this NodeSet equals node.



137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'ext/nokolexbor/nl_node_set.c', line 137

static VALUE
nl_node_set_is_include(VALUE self, VALUE rb_node)
{
  lexbor_array_t *array = nl_rb_node_set_unwrap(self);
  lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);

  for (size_t i = 0; i < array->length; i++)
    if (array->list[i] == node) {
      return Qtrue;
    }

  return Qfalse;
}

#index(node = nil) ⇒ Integer

Returns The index of the first node in this NodeSet that is equal to node or meets the given block. Returns nil if no match is found.

Returns:

  • (Integer)

    The index of the first node in this NodeSet that is equal to node or meets the given block. Returns nil if no match is found.



58
59
60
61
62
63
64
65
# File 'lib/nokolexbor/node_set.rb', line 58

def index(node = nil)
  if node
    each_with_index { |member, j| return j if member == node }
  elsif block_given?
    each_with_index { |member, j| return j if yield(member) }
  end
  nil
end

#inner_html(*args) ⇒ String

Get the inner html of all contained Nodes.

Returns:

  • (String)


81
82
83
# File 'lib/nokolexbor/node_set.rb', line 81

def inner_html(*args)
  self.map { |n| n.inner_html(*args) }.join
end

#lastNode?

Get the last element of the NodeSet.

Returns:



48
49
50
# File 'lib/nokolexbor/node_set.rb', line 48

def last
  self[-1]
end

#lengthInteger Also known as: size

Get the length of this NodeSet.

Returns:

  • (Integer)


71
72
73
74
75
# File 'ext/nokolexbor/nl_node_set.c', line 71

static VALUE
nl_node_set_length(VALUE self)
{
  return INT2NUM(nl_rb_node_set_unwrap(self)->length);
}

#nokogiri_css(*args) ⇒ NodeSet

Search this object for CSS rules. rules must be one or more CSS selectors. It supports a mixed syntax of CSS selectors and XPath.

This method uses libxml2 as the selector engine. It works the same way as Nokogiri::Node#css.

Returns:

  • (NodeSet)

    The matched set of Nodes.

See Also:



186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/nokolexbor/node_set.rb', line 186

def nokogiri_css(*args)
  rules, handler, ns, _ = extract_params(args)
  paths = css_rules_to_xpath(rules, ns)

  NodeSet.new(@document) do |set|
    each do |node|
      node.send(:xpath_internal, node, paths, handler, ns, nil).each do |inner_node|
        set << inner_node
      end
    end
  end
end

#outer_html(*args) ⇒ String Also known as: to_s, to_html, serialize

Convert this NodeSet to HTML.

Returns:

  • (String)


88
89
90
# File 'lib/nokolexbor/node_set.rb', line 88

def outer_html(*args)
  self.map { |n| n.outer_html(*args) }.join
end

#popNode?

Returns The last element of this NodeSet and removes it. Returns nil if the set is empty.

Returns:

  • (Node, nil)

    The last element of this NodeSet and removes it. Returns nil if the set is empty.



115
116
117
118
119
# File 'lib/nokolexbor/node_set.rb', line 115

def pop
  return nil if length == 0

  delete(last)
end

#push(node) ⇒ NodeSet Also known as: <<

Append node to the NodeSet.

Parameters:

Returns:

  • (NodeSet)

    self, to support chaining of calls.



87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'ext/nokolexbor/nl_node_set.c', line 87

static VALUE
nl_node_set_push(VALUE self, VALUE rb_node)
{
  lexbor_array_t *array = nl_rb_node_set_unwrap(self);
  lxb_dom_node_t *node = nl_rb_node_unwrap(rb_node);

  lxb_status_t status = lexbor_array_push_unique(array, node);
  if (status != LXB_STATUS_OK && status != LXB_STATUS_STOPPED) {
    nl_raise_lexbor_error(status);
  }

  return self;
}

#removeObject Also known as: unlink

Remove all nodes in this NodeSet.



99
100
101
# File 'lib/nokolexbor/node_set.rb', line 99

def remove
  self.each(&:remove)
end

#reverseNodeSet

Returns A new NodeSet containing all the nodes in the NodeSet in reverse order.

Returns:

  • (NodeSet)

    A new NodeSet containing all the nodes in the NodeSet in reverse order.



154
155
156
157
158
159
160
# File 'lib/nokolexbor/node_set.rb', line 154

def reverse
  node_set = NodeSet.new(@document)
  (length - 1).downto(0) do |x|
    node_set.push(self[x])
  end
  node_set
end

#shiftNode?

Returns The first element of this NodeSet and removes it. Returns nil if the set is empty.

Returns:

  • (Node, nil)

    The first element of this NodeSet and removes it. Returns nil if the set is empty.



123
124
125
126
127
# File 'lib/nokolexbor/node_set.rb', line 123

def shift
  return nil if length == 0

  delete(first)
end

#to_aArray<Node> Also known as: to_ary

Returns This list as an Array.

Returns:

  • (Array<Node>)

    This list as an Array



254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'ext/nokolexbor/nl_node_set.c', line 254

static VALUE
nl_node_set_to_array(VALUE self)
{
  lexbor_array_t *array = nl_rb_node_set_unwrap(self);

  VALUE list = rb_ary_new2(array->length);
  VALUE doc = nl_rb_document_get(self);
  for (size_t i = 0; i < array->length; i++) {
    lxb_dom_node_t *node = (lxb_dom_node_t *)array->list[i];
    VALUE rb_node = nl_rb_node_create(node, doc);
    rb_ary_push(list, rb_node);
  }

  return list;
}

#wrap(node_or_tags) ⇒ NodeSet

Wrap all nodes of this NodeSet with node_or_tags.

Returns:

  • (NodeSet)

    self, to support chaining.

See Also:



167
168
169
170
# File 'lib/nokolexbor/node_set.rb', line 167

def wrap(node_or_tags)
  map { |node| node.wrap(node_or_tags) }
  self
end

#xpath(*args) ⇒ NodeSet

Search this node for XPath paths. paths must be one or more XPath queries.

It works the same way as Nokogiri::Node#xpath.

Examples:

node.xpath('.//title')

Returns:

  • (NodeSet)

    The matched set of Nodes.



173
174
175
176
177
178
179
180
181
182
183
# File 'lib/nokolexbor/node_set.rb', line 173

def xpath(*args)
  paths, handler, ns, binds = extract_params(args)

  NodeSet.new(@document) do |set|
    each do |node|
      node.send(:xpath_internal, node, paths, handler, ns, binds).each do |inner_node|
        set << inner_node
      end
    end
  end
end

#|(other) ⇒ NodeSet Also known as: +

Returns A new set built by merging the other set, excluding duplicates.

Returns:

  • (NodeSet)

    A new set built by merging the other set, excluding duplicates.



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'ext/nokolexbor/nl_node_set.c', line 273

static VALUE
nl_node_set_union(VALUE self, VALUE other)
{
  if (!rb_obj_is_kind_of(other, cNokolexborNodeSet)) {
    rb_raise(rb_eArgError, "Parameter must be a Nokolexbor::NodeSet");
  }

  lexbor_array_t *self_array = nl_rb_node_set_unwrap(self);
  lexbor_array_t *other_array = nl_rb_node_set_unwrap(other);

  if (self_array->length + other_array->length == 0) {
    return nl_rb_node_set_create_with_data(NULL, nl_rb_document_get(self));
  }

  lexbor_array_t *new_array = lexbor_array_create();
  lxb_status_t status = lexbor_array_init(new_array, self_array->length + other_array->length);
  if (status != LXB_STATUS_OK) {
    nl_raise_lexbor_error(status);
  }

  memcpy(new_array->list, self_array->list, sizeof(lxb_dom_node_t *) * self_array->length);
  new_array->length = self_array->length;

  for (size_t i = 0; i < other_array->length; i++) {
    lexbor_array_push_unique(new_array, other_array->list[i]);
  }

  return nl_rb_node_set_create_with_data(new_array, nl_rb_document_get(self));
}