Class: Hypermicrodata::Document

Inherits:
Object
  • Object
show all
Defined in:
lib/hypermicrodata/document.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(content, options = {}) ⇒ Document

Returns a new instance of Document.



6
7
8
9
10
11
12
# File 'lib/hypermicrodata/document.rb', line 6

def initialize(content, options = {})
  encoding = options[:force_encoding] || nil
  @doc = Nokogiri::HTML(content, nil, encoding)
  @page_url = options[:page_url]
  @filter_xpath_attr = options[:filter_xpath_attr]
  @items = extract_items
end

Instance Attribute Details

#docObject (readonly)

Returns the value of attribute doc.



4
5
6
# File 'lib/hypermicrodata/document.rb', line 4

def doc
  @doc
end

#itemsObject (readonly)

Returns the value of attribute items.



4
5
6
# File 'lib/hypermicrodata/document.rb', line 4

def items
  @items
end

Instance Method Details

#extract_itemsObject



14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/hypermicrodata/document.rb', line 14

def extract_items
  itemscopes = []
  if @filter_xpath_attr
    itemscopes = @doc.xpath("//*[#{@filter_xpath_attr} and @itemscope]")
    puts "XPath //*[#{@filter_xpath_attr}] is not found. root node is used." if itemscopes.empty?
  end
  itemscopes = @doc.xpath('self::*[@itemscope] | .//*[@itemscope and not(@itemprop)]') if itemscopes.empty?

  itemscopes.collect do |itemscope|
    Item.new(itemscope, @page_url)
  end
end