Module: Horsefield::Scraper

Defined in:
lib/horsefield/scraper.rb

Defined Under Namespace

Modules: ClassMethods

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.included(base) ⇒ Object



14
15
16
# File 'lib/horsefield/scraper.rb', line 14

def self.included(base)
  base.extend(ClassMethods)
end

.scrape(html_or_url, &block) ⇒ Object



7
8
9
10
11
12
# File 'lib/horsefield/scraper.rb', line 7

def self.scrape(html_or_url, &block)
  Class.new {
    include Horsefield::Scraper
    instance_eval(&block)
  }.new(html_or_url).scrape
end

Instance Method Details

#[](field) ⇒ Object



30
31
32
# File 'lib/horsefield/scraper.rb', line 30

def [](field)
  fields[field]
end

#fieldsObject



38
39
40
41
# File 'lib/horsefield/scraper.rb', line 38

def fields
  @fields ||= self.class.lookups.reduce({}) { |fields, l| fields.merge(l.call(@doc)) }.
    instance_eval(&self.class.postprocessor)
end

#initialize(html_xml_or_url, remove_namespaces: false) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
# File 'lib/horsefield/scraper.rb', line 18

def initialize(html_xml_or_url, remove_namespaces: false)
  html_xml_or_url = open(html_xml_or_url).read if html_xml_or_url =~ /\A#{URI::regexp}\Z/

  @doc = if html_xml_or_url =~ /\A<\?xml/
           doc = Nokogiri::XML(html_xml_or_url)
           doc = doc.remove_namespaces! if remove_namespaces
           doc
         else
           Nokogiri::HTML(html_xml_or_url)
         end
end

#scrapeObject



34
35
36
# File 'lib/horsefield/scraper.rb', line 34

def scrape
  fields
end