Class: Nokogiri::HTML::Document

Inherits:
XML::Document show all
Defined in:
lib/nokogiri/html/document.rb,
lib/nokogiri/ffi/html/document.rb,
ext/nokogiri/html_document.c

Constant Summary

Constants inherited from XML::Node

XML::Node::ATTRIBUTE_DECL, XML::Node::ATTRIBUTE_NODE, XML::Node::CDATA_SECTION_NODE, XML::Node::COMMENT_NODE, XML::Node::DOCB_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE, XML::Node::DOCUMENT_NODE, XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE, XML::Node::ELEMENT_DECL, XML::Node::ELEMENT_NODE, XML::Node::ENTITY_DECL, XML::Node::ENTITY_NODE, XML::Node::ENTITY_REF_NODE, XML::Node::HTML_DOCUMENT_NODE, XML::Node::NAMESPACE_DECL, XML::Node::NOTATION_NODE, XML::Node::PI_NODE, XML::Node::TEXT_NODE, XML::Node::XINCLUDE_END, XML::Node::XINCLUDE_START

Instance Attribute Summary collapse

Attributes inherited from XML::Document

#errors

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from XML::Document

#add_child, #clone, #collect_namespaces, #create_cdata, #create_element, #create_entity, #create_text_node, #decorate, #decorators, #document, #dup, #encoding, #encoding=, #initialize, #name, #namespaces, recursively_remove_namespaces_from_node, #remove_namespaces!, #root, #root=, #slop!, #url, #validate, #version, wrap, wrap_with_error_handling

Methods inherited from XML::Node

#<=>, #==, #>, #[], #[]=, #accept, #add_child, #add_namespace, #add_namespace_definition, #add_next_sibling, #add_previous_sibling, #after, #ancestors, #at, #at_css, #at_xpath, #attribute, #attribute_nodes, #attribute_with_ns, #attributes, #before, #blank?, #cdata?, #child, #children, #clone, #comment?, #content, #content=, #create_external_subset, #create_internal_subset, #css, #css_path, #decorate!, #default_namespace=, #description, #document, #dup, #each, #element?, #element_children, #elements, #encode_special_chars, #external_subset, #first_element_child, #fragment?, #has_attribute?, #html?, #initialize, #inner_html, #inner_html=, #inner_text, #internal_subset, #key?, #keys, #last_element_child, #line, #matches?, #name, #name=, #namespace, #namespace=, #namespace_definitions, #namespace_scopes, #namespaced_key?, #namespaces, #next, #next_element, #next_sibling, #node_name, #node_name=, node_properties, #node_type, #parent, #parent=, #parse, #path, #pointer_id, #previous, #previous_element, #previous_sibling, #read_only?, #remove, #remove_attribute, #replace, #search, #set_attribute, #swap, #text, #text?, #to_html, #to_s, #to_xhtml, #to_xml, #traverse, #unlink, #values, wrap, #write_html_to, #write_to, #write_xhtml_to, #write_xml_to, #xml?, #xpath

Methods included from XML::PP::Node

#inspect, #pretty_print

Constructor Details

This class inherits a constructor from Nokogiri::XML::Document

Instance Attribute Details

#cstructObject

:nodoc:



5
6
7
# File 'lib/nokogiri/ffi/html/document.rb', line 5

def cstruct
  @cstruct
end

Class Method Details

.newObject

Create a new document



7
8
9
10
11
12
13
# File 'lib/nokogiri/ffi/html/document.rb', line 7

def self.new(*args) # :nodoc:
  uri         = args[0]
  external_id = args[1]
  doc = wrap(LibXML.htmlNewDoc(uri, external_id))
  doc.send :initialize, *args
  doc
end

.parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML) {|options| ... } ⇒ Object

Parse HTML. thing may be a String, or any object that responds to read and close such as an IO, or StringIO. url is resource where this document is located. encoding is the encoding that should be used when processing the document. options is a number that sets options in the parser, such as Nokogiri::XML::ParseOptions::RECOVER. See the constants in Nokogiri::XML::ParseOptions.

Yields:

  • (options)


64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/nokogiri/html/document.rb', line 64

def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block

  options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
  # Give the options to the user
  yield options if block_given?

  if string_or_io.respond_to?(:encoding)
    unless string_or_io.encoding.name == "ASCII-8BIT"
      encoding ||= string_or_io.encoding.name
    end
  end

  if string_or_io.respond_to?(:read)
    url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
    return read_io(string_or_io, url, encoding, options.to_i)
  end

  # read_memory pukes on empty docs
  return new if string_or_io.nil? or string_or_io.empty?

  read_memory(string_or_io, url, encoding, options.to_i)
end

.read_io(io, url, encoding, options) ⇒ Object

Read the HTML document from io with given url, encoding, and options. See Nokogiri::HTML.parse



15
16
17
18
19
# File 'lib/nokogiri/ffi/html/document.rb', line 15

def self.read_io(io, url, encoding, options) # :nodoc:
  wrap_with_error_handling do
    LibXML.htmlReadIO(IoCallbacks.reader(io), nil, nil, url, encoding, options)
  end
end

.read_memory(string, url, encoding, options) ⇒ Object

Read the HTML document contained in string with given url, encoding, and options. See Nokogiri::HTML.parse



21
22
23
24
25
# File 'lib/nokogiri/ffi/html/document.rb', line 21

def self.read_memory(string, url, encoding, options) # :nodoc:
  wrap_with_error_handling do
    LibXML.htmlReadMemory(string, string.length, url, encoding, options)
  end
end

Instance Method Details

#fragment(tags = nil) ⇒ Object

Create a Nokogiri::XML::DocumentFragment from tags



51
52
53
# File 'lib/nokogiri/html/document.rb', line 51

def fragment tags = nil
  DocumentFragment.new(self, tags, self.root)
end

#meta_encodingObject

Get the meta tag encoding for this document. If there is no meta tag, then nil is returned



7
8
9
10
11
12
13
# File 'lib/nokogiri/html/document.rb', line 7

def meta_encoding
  return nil unless meta = css('meta').find { |node|
    node['http-equiv'] =~ /Content-Type/i
  }

  /charset\s*=\s*([\w-]+)/i.match(meta['content'])[1]
end

#meta_encoding=(encoding) ⇒ Object

Set the meta tag encoding for this document. If there is no meta content tag, nil is returned and the encoding is not set.



18
19
20
21
22
23
24
25
# File 'lib/nokogiri/html/document.rb', line 18

def meta_encoding= encoding
  return nil unless meta = css('meta').find { |node|
    node['http-equiv'] =~ /Content-Type/i
  }

  meta['content'] = "text/html; charset=%s" % encoding
  encoding
end

#serialize(options = {}, &block) ⇒ Object

Serialize Node using options. Save options can also be set using a block. See SaveOptions.

These two statements are equivalent:

node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)

or

node.serialize(:encoding => 'UTF-8') do |config|
  config.format.as_xml
end


41
42
43
44
45
46
47
# File 'lib/nokogiri/html/document.rb', line 41

def serialize options = {}, &block
  options[:save_with] ||= XML::Node::SaveOptions::FORMAT |
      XML::Node::SaveOptions::AS_HTML |
      XML::Node::SaveOptions::NO_DECLARATION |
      XML::Node::SaveOptions::NO_EMPTY_TAGS
  super
end

#typeObject

The type for this document



130
131
132
133
134
135
# File 'ext/nokogiri/html_document.c', line 130

static VALUE type(VALUE self)
{
  htmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);
  return INT2NUM((long)doc->type);
}