Class: EmailCrawler::Scraper

Inherits:
Object
  • Object
show all
Includes:
MechanizeHelper
Defined in:
lib/email_crawler/scraper.rb

Constant Summary collapse

MAX_URLS =
10

Instance Method Summary collapse

Methods included from MechanizeHelper

#get, #new_agent

Constructor Details

#initialize(google_website) ⇒ Scraper

Returns a new instance of Scraper.



9
10
11
# File 'lib/email_crawler/scraper.rb', line 9

def initialize(google_website)
  @google_website = "https://www.#{google_website}/"
end

Instance Method Details

#top_ten_urls_for(q) ⇒ Object



13
14
15
16
17
18
19
20
21
22
# File 'lib/email_crawler/scraper.rb', line 13

def top_ten_urls_for(q)
  search_page = agent.get(@google_website)
  search_form = search_page.form_with(action: "/search")
  search_form.field_with(name: "q").value = q
  search_results_page = agent.submit(search_form)
  search_results_page.search("#search ol li h3.r a").
    map { |a| a["href"].downcase }.
    reject { |url| url =~ %r(\A/search[?]q=) }.
    first(MAX_URLS)
end