Class: HtmlEntry::PageFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/html_entry/page_fetcher.rb

Overview

Page fetcher

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#instructionsHash

Get instructions

Returns:

  • (Hash)


20
21
22
# File 'lib/html_entry/page_fetcher.rb', line 20

def instructions
  @instructions
end

Instance Method Details

#fetch(document) ⇒ Hash

Fetch entities from document

Parameters:

  • document (Nokogiri::HTML::Document)

Returns:

  • (Hash)


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/html_entry/page_fetcher.rb', line 27

def fetch(document)
  items = []
  if instructions[:block].nil?
    # "block" instructions is not defined
    block_document = if document.instance_of?(Nokogiri::HTML::Document)
                       fetch_block_document(
                           document,
                           type:     :selector,
                           selector: 'body'
                       ).first
                     else
                       document
                     end

    fetch_data(block_document, instructions[:entity]).each do |element|
      items.push element
    end
  else
    # fetch each "block" and process entities
    fetch_block_document(document, instructions[:block]).each do |block_document|
      fetch_data(block_document, instructions[:entity]).each do |element|
        items.push element
      end
    end
  end
  items
end

#last_page?(document) ⇒ TrueClass, FalseClass

Check if it’s a last page

Parameters:

  • document (Nokogiri::HTML::Document)

Returns:

  • (TrueClass, FalseClass)


61
62
63
64
65
66
67
# File 'lib/html_entry/page_fetcher.rb', line 61

def last_page?(document)
  if instructions[:last_page][:type] == :function
    !!call_function(document, instructions[:last_page])
  else
    Page.fetch_nodes(document, instructions[:last_page]).count > 0
  end
end