Class: Nokolexbor::Document

Inherits:
Node
  • Object
show all
Defined in:
lib/nokolexbor/document.rb

Constant Summary

Constants inherited from Node

Node::ATTRIBUTE_NODE, Node::CDATA_SECTION_NODE, Node::COMMENT_NODE, Node::DOCUMENT_FRAG_NODE, Node::DOCUMENT_NODE, Node::DOCUMENT_TYPE_NODE, Node::ELEMENT_NODE, Node::ENTITY_NODE, Node::ENTITY_REF_NODE, Node::LOOKS_LIKE_XPATH, Node::NOTATION_NODE, Node::PI_NODE, Node::TEXT_NODE

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Node

#<<, #add_class, #add_next_sibling, #add_previous_sibling, #after, #ancestors, #append_class, #at, #at_css, #at_xpath, #attributes, #before, #cdata?, #children=, #classes, #comment?, #css, #css_path, #document?, #each, #element?, #fragment, #fragment?, #kwattr_add, #kwattr_append, #kwattr_remove, #kwattr_values, #matches?, #nokogiri_at_css, #nokogiri_css, #parent=, #prepend_child, #processing_instruction?, #remove_class, #replace, #search, #swap, #text?, #traverse, #value?, #wrap, #write_to, #xpath

Class Method Details

.parse(string_or_io) ⇒ Document

Parse HTML into a Nokolexbor::Document.

Parameters:

  • string_or_io (String, #read)

    The HTML to be parsed. It may be a String, or any object that responds to #read such as an IO, or StringIO.

Returns:



151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/nokolexbor/document.rb', line 151

def self.parse(string_or_io)
  html = string_or_io
  if string_or_io.respond_to?(:read)
    html = string_or_io.read
  end

  if html.respond_to?(:encoding) && html.encoding != Encoding::UTF_8
    html = html.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
  end

  parse_native(html)
end

Instance Method Details

#create_cdata(string, &block) ⇒ CDATA

Create a CDATA containing string.

Returns:

  • (CDATA)


57
58
59
# File 'lib/nokolexbor/document.rb', line 57

def create_cdata(string, &block)
  Nokolexbor::CDATA.new(string.to_s, self, &block)
end

#create_comment(string, &block) ⇒ Comment

Create a Comment containing string.

Returns:

  • (Comment)


64
65
66
# File 'lib/nokolexbor/document.rb', line 64

def create_comment(string, &block)
  Nokolexbor::Comment.new(string.to_s, self, &block)
end

#create_element(name, *contents_or_attrs, &block) ⇒ Element

Create an Element with name belonging to this document, optionally setting contents or attributes.

Examples:

An empty element without attributes

doc.create_element("div")
# => <div></div>

An element with contents

doc.create_element("div", "contents")
# => <div>contents</div>

An element with attributes

doc.create_element("div", {"class" => "container"})
# => <div class='container'></div>

An element with contents and attributes

doc.create_element("div", "contents", {"class" => "container"})
# => <div class='container'>contents</div>

Passing a block to mutate the element

doc.create_element("div") { |node| node["class"] = "blue" }
# => <div class='blue'></div>

Parameters:

  • name (String)
  • contents_or_attrs (#to_s, Hash)

Returns:

  • (Element)


32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/nokolexbor/document.rb', line 32

def create_element(name, *contents_or_attrs, &block)
  elm = Nokolexbor::Element.new(name, self, &block)
  contents_or_attrs.each do |arg|
    case arg
    when Hash
      arg.each do |k, v|
        elm[k.to_s] = v.to_s
      end
    else
      elm.content = arg.to_s
    end
  end
  elm
end

#create_text_node(string, &block) ⇒ Text

Create a Text with string.

Returns:

  • (Text)


50
51
52
# File 'lib/nokolexbor/document.rb', line 50

def create_text_node(string, &block)
  Nokolexbor::Text.new(string.to_s, self, &block)
end

#documentDocument

A reference to self.

Returns:



71
72
73
# File 'lib/nokolexbor/document.rb', line 71

def document
  self
end

#meta_encodingString

Get the meta tag encoding for this document. If there is no meta tag, nil is returned.

Returns:

  • (String)


78
79
80
81
82
83
84
# File 'lib/nokolexbor/document.rb', line 78

def meta_encoding
  if (meta = at_css("meta[charset]"))
    meta[:charset]
  elsif (meta = meta_content_type)
    meta["content"][/charset\s*=\s*([\w-]+)/i, 1]
  end
end

#meta_encoding=(encoding) ⇒ Object

Set the meta tag encoding for this document.

If an meta encoding tag is already present, its content is replaced with the given text.

Otherwise, this method tries to create one at an appropriate place supplying head and/or html elements as necessary, which is inside a head element if any, and before any text node or content element (typically <body>) if any.



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/nokolexbor/document.rb', line 95

def meta_encoding=(encoding)
  if (meta = meta_content_type)
    meta["content"] = format("text/html; charset=%s", encoding)
    encoding
  elsif (meta = at_css("meta[charset]"))
    meta["charset"] = encoding
  else
    meta = Nokolexbor::Node.new("meta", self)
    meta["charset"] = encoding

    if (head = at_css("head"))
      head.prepend_child(meta)
    else
      (meta)
    end
    encoding
  end
end

#set_metadata_element(element) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/nokolexbor/document.rb', line 121

def (element)
  if (head = at_css("head"))
    head << element
  elsif (html = at_css("html"))
    head = html.prepend_child(Nokolexbor::Node.new("head", self))
    head.prepend_child(element)
  elsif (first = children.find do |node|
           case node
           when Nokolexbor::Node
             true
           end
         end)
    # We reach here only if the underlying document model
    # allows <html>/<head> elements to be omitted and does not
    # automatically supply them.
    first.add_previous_sibling(element)
  else
    html = add_child(Nokolexbor::Node.new("html", self))
    head = html.add_child(Nokolexbor::Node.new("head", self))
    head.prepend_child(element)
  end
end