Module: RDF::Microdata::Reader::Nokogiri

Defined in:
lib/rdf/microdata/reader/nokogiri.rb

Overview

Nokogiri implementation of an HTML parser.

Defined Under Namespace

Classes: NodeProxy, NodeSetProxy

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.librarySymbol

Returns the name of the underlying XML library.

Returns:

  • (Symbol)


12
13
14
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 12

def self.library
  :nokogiri
end

Instance Method Details

#doc_base(base) ⇒ String

Find value of document base

Parameters:

  • base (String)

    Existing base from URI or :base_uri

Returns:

  • (String)


212
213
214
215
216
217
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 212

def doc_base(base)
  # find if the document has a base element
  base_el = @doc.at_css("html>head>base") 
  base = base_el.attribute("href").to_s.split("#").first if base_el
  base
end

#doc_errorsObject

Document errors



202
203
204
205
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 202

def doc_errors
  # FIXME: Nokogiri version 1.5.5 thinks many HTML5 elements are invalid
  @doc.errors.reject {|e| e.to_s =~ /(Tag (?:article|aside|audio|canvas|command|datalist|details|embed|figcaption|figure|footer|header|hgroup|keygen|main|mark|meter|nav|output|progress|ruby|section|time|video|wbr) invalid|Missing attribute name)/}
end

#find_element_by_id(id) ⇒ Object

Look up an element in the document by id



229
230
231
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 229

def find_element_by_id(id)
  (e = @doc.at_css("##{id}")) && NodeProxy.new(e)
end

#getItemsObject

Based on Microdata element.getItems



223
224
225
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 223

def getItems
  @doc.css('[itemscope]').select {|el| !el.has_attribute?('itemprop')}.map {|n| NodeProxy.new(n)}
end

#initialize_html(input, options = {})

This method returns an undefined value.

Initializes the underlying XML library.

Parameters:

  • options (Hash{Symbol => Object}) (defaults to: {})


175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 175

def initialize_html(input, options = {})
  require 'nokogiri' unless defined?(::Nokogiri)
  @doc = case input
  when ::Nokogiri::XML::Document
    input
  else
    # Try to detect charset from input
    options[:encoding] ||= input.charset if input.respond_to?(:charset)
    
    # Otherwise, default is utf-8
    options[:encoding] ||= 'utf-8'
    options[:encoding] = options[:encoding].to_s if options[:encoding]

    ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding])
  end
end

#rootObject

Return proxy for document root



196
197
198
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 196

def root
  @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
end