Class: WebscraperFramework::Page

Inherits:
Object
  • Object
show all
Defined in:
lib/webscraper_framework/page.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(html: nil) ⇒ Page

Returns a new instance of Page.



21
22
23
# File 'lib/webscraper_framework/page.rb', line 21

def initialize html: nil
  self.html = html
end

Instance Attribute Details

#htmlObject

Returns the value of attribute html.



5
6
7
# File 'lib/webscraper_framework/page.rb', line 5

def html
  @html
end

Class Method Details

.by_html(html) ⇒ Object

helper for seamless initialisation no matter what starting point



36
37
38
# File 'lib/webscraper_framework/page.rb', line 36

def self.by_html(html)
  self.new(html: html)
end

.by_html_string(html_string) ⇒ Object

helper for seamless initialisation no matter what starting point



31
32
33
# File 'lib/webscraper_framework/page.rb', line 31

def self.by_html_string(html_string)
  self.new(html: Nokogiri::HTML(html_string))
end

.by_url(url) ⇒ Object

helper for seamless initialisation no matter what starting point



26
27
28
# File 'lib/webscraper_framework/page.rb', line 26

def self.by_url(url)
  self.new(html: Nokogiri::HTML(get_page(url)))
end

.get_page(url, from_cache = true) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/webscraper_framework/page.rb', line 7

def self.get_page(url, from_cache = true)
  url_hash = Digest::SHA256.hexdigest(url)
  filename = "cache/#{url_hash}"
  if from_cache && File.file?(filename)
    result = open(filename)
    puts "Gotten #{filename} from cache"
  else
    result = open(url)
    File.write(filename, result.read)
    puts "Written cache file #{filename}"
  end
  return result.read
end

Instance Method Details

#collection_by_selector(selector) ⇒ Object



40
41
42
# File 'lib/webscraper_framework/page.rb', line 40

def collection_by_selector(selector)
  self.html.css(selector).map{|item| WebscraperFramework::Page.by_html(item)}
end