Class: HtmlScraper::Scraper
- Inherits:
-
Object
- Object
- HtmlScraper::Scraper
- Defined in:
- lib/html_scraper/scraper.rb
Instance Method Summary collapse
-
#initialize(template:, verbose: false) ⇒ Scraper
constructor
A new instance of Scraper.
- #inspect(template_node, html_node) ⇒ Object
- #log(text) ⇒ Object
- #parse(html) ⇒ Object
Constructor Details
#initialize(template:, verbose: false) ⇒ Scraper
Returns a new instance of Scraper.
6 7 8 9 10 |
# File 'lib/html_scraper/scraper.rb', line 6 def initialize(template:, verbose: false) @template = template @depth = 0 @verbose = verbose end |
Instance Method Details
#inspect(template_node, html_node) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/html_scraper/scraper.rb', line 20 def inspect(template_node, html_node) result = {} tnode_xpath = build_xpath(template_node) matching_nodes = html_node.xpath(tnode_xpath) log("START #{tnode_xpath}...") @depth += 1 sub_results = matching_nodes.map { |node| parse_node(template_node, node) } @depth -= 1 log("END #{tnode_xpath}: #{sub_results.size} matches") if !template_node.attribute('hs-repeat').blank? result[template_node.attribute('hs-repeat').value.to_sym] = sub_results else result.merge!(sub_results.reduce({}, &:merge)) end return result end |
#log(text) ⇒ Object
76 77 78 |
# File 'lib/html_scraper/scraper.rb', line 76 def log(text) puts "#{' ' * @depth}#{text}" if @verbose end |
#parse(html) ⇒ Object
12 13 14 15 16 17 18 |
# File 'lib/html_scraper/scraper.rb', line 12 def parse(html) html_template = Nokogiri::HTML(@template) return {} if html_template.root.nil? template_root = html_template.root.children.first html_root = Nokogiri::HTML(html).root return inspect(template_root, html_root) end |