Class: HtmlScraper::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/html_scraper/scraper.rb

Instance Method Summary collapse

Constructor Details

#initialize(template:, verbose: false) ⇒ Scraper

Returns a new instance of Scraper.



6
7
8
9
10
# File 'lib/html_scraper/scraper.rb', line 6

def initialize(template:, verbose: false)
  @template = template
  @depth = 0
  @verbose = verbose
end

Instance Method Details

#inspect(template_node, html_node) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/html_scraper/scraper.rb', line 20

def inspect(template_node, html_node)
  result = {}
  tnode_xpath = build_xpath(template_node)
  matching_nodes = html_node.xpath(tnode_xpath)
  log("START #{tnode_xpath}...")
  @depth += 1
  sub_results = matching_nodes.map { |node| parse_node(template_node, node) }
  @depth -= 1
  log("END #{tnode_xpath}: #{sub_results.size} matches")

  if !template_node.attribute('hs-repeat').blank?
    result[template_node.attribute('hs-repeat').value.to_sym] = sub_results
  else
    result.merge!(sub_results.reduce({}, &:merge))
  end
  return result
end

#log(text) ⇒ Object



76
77
78
# File 'lib/html_scraper/scraper.rb', line 76

def log(text)
  puts "#{'   ' * @depth}#{text}" if @verbose
end

#parse(html) ⇒ Object



12
13
14
15
16
17
18
# File 'lib/html_scraper/scraper.rb', line 12

def parse(html)
  html_template = Nokogiri::HTML(@template)
  return {} if html_template.root.nil?
  template_root = html_template.root.children.first
  html_root = Nokogiri::HTML(html).root
  return inspect(template_root, html_root)
end