Module: Horsefield::Scraper
- Defined in:
- lib/horsefield/scraper.rb
Defined Under Namespace
Modules: ClassMethods
Class Method Summary collapse
Instance Method Summary collapse
- #[](field) ⇒ Object
- #fields ⇒ Object
- #initialize(html_xml_or_url, remove_namespaces: false) ⇒ Object
- #scrape ⇒ Object
Class Method Details
.included(base) ⇒ Object
14 15 16 |
# File 'lib/horsefield/scraper.rb', line 14 def self.included(base) base.extend(ClassMethods) end |
.scrape(html_or_url, &block) ⇒ Object
7 8 9 10 11 12 |
# File 'lib/horsefield/scraper.rb', line 7 def self.scrape(html_or_url, &block) Class.new { include Horsefield::Scraper instance_eval(&block) }.new(html_or_url).scrape end |
Instance Method Details
#[](field) ⇒ Object
30 31 32 |
# File 'lib/horsefield/scraper.rb', line 30 def [](field) fields[field] end |
#fields ⇒ Object
38 39 40 41 |
# File 'lib/horsefield/scraper.rb', line 38 def fields @fields ||= self.class.lookups.reduce({}) { |fields, l| fields.merge(l.call(@doc)) }. instance_eval(&self.class.postprocessor) end |
#initialize(html_xml_or_url, remove_namespaces: false) ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/horsefield/scraper.rb', line 18 def initialize(html_xml_or_url, remove_namespaces: false) html_xml_or_url = open(html_xml_or_url).read if html_xml_or_url =~ /\A#{URI::regexp}\Z/ @doc = if html_xml_or_url =~ /\A<\?xml/ doc = Nokogiri::XML(html_xml_or_url) doc = doc.remove_namespaces! if remove_namespaces doc else Nokogiri::HTML(html_xml_or_url) end end |
#scrape ⇒ Object
34 35 36 |
# File 'lib/horsefield/scraper.rb', line 34 def scrape fields end |