Class: Whatsa::Scraper
- Inherits:
-
Object
- Object
- Whatsa::Scraper
- Defined in:
- lib/whatsa/scraper.rb
Constant Summary collapse
- WIKISEARCH =
'https://en.wikipedia.org/w/index.php?search='
Instance Attribute Summary collapse
-
#page ⇒ Object
readonly
Returns the value of attribute page.
-
#query ⇒ Object
readonly
Returns the value of attribute query.
Instance Method Summary collapse
- #article? ⇒ Boolean
- #disambig? ⇒ Boolean
-
#initialize(term) ⇒ Scraper
constructor
A new instance of Scraper.
- #make_article ⇒ Object
- #make_disambig ⇒ Object
- #not_found? ⇒ Boolean
- #results_page? ⇒ Boolean
Constructor Details
#initialize(term) ⇒ Scraper
Returns a new instance of Scraper.
7 8 9 10 11 12 13 14 15 |
# File 'lib/whatsa/scraper.rb', line 7 def initialize(term) # only keep word chars and parens, turn everything between each 'word' # to a single '+' and remove '+'s at the beginning and end if they're there # @query = term.gsub(/\W+/, '+').gsub(/(\A\+|\+\z)/, '') @query = term.gsub(/[^A-z0-9\(\)]+/, '+').gsub(/(\A\+|\+\z)/, '') # store the page in an instance variable so we don't keep polling the site @page = Nokogiri::HTML(open(WIKISEARCH + self.query)) end |
Instance Attribute Details
#page ⇒ Object (readonly)
Returns the value of attribute page.
5 6 7 |
# File 'lib/whatsa/scraper.rb', line 5 def page @page end |
#query ⇒ Object (readonly)
Returns the value of attribute query.
5 6 7 |
# File 'lib/whatsa/scraper.rb', line 5 def query @query end |
Instance Method Details
#article? ⇒ Boolean
25 26 27 |
# File 'lib/whatsa/scraper.rb', line 25 def article? !self.page.css('#ca-nstab-main').empty? && !disambig? end |
#disambig? ⇒ Boolean
29 30 31 |
# File 'lib/whatsa/scraper.rb', line 29 def disambig? !self.page.css('#disambigbox').empty? end |
#make_article ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/whatsa/scraper.rb', line 33 def make_article if article? Whatsa::Article.new(self.page) elsif results_page? && !not_found? first_title = self.page.css('.mw-search-results li a').first.text self.class.new(first_title).make_article elsif disambig? self.class.new(make_disambig.choices.first).make_article else nil end end |
#make_disambig ⇒ Object
46 47 48 |
# File 'lib/whatsa/scraper.rb', line 46 def make_disambig disambig? ? Whatsa::Disambig.new(self.page) : nil end |
#not_found? ⇒ Boolean
21 22 23 |
# File 'lib/whatsa/scraper.rb', line 21 def not_found? !self.page.css('.mw-search-nonefound').empty? end |
#results_page? ⇒ Boolean
17 18 19 |
# File 'lib/whatsa/scraper.rb', line 17 def results_page? !self.page.css('.searchresults').empty? end |