Class: GoogleBrowse::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/google_browse/scraper.rb

Constant Summary collapse

BASE_PAGE =
'http://google.com'
RESULTS_PER_REQUEST =

TODO: Use this! &num=100?

100

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(query) ⇒ Scraper

Returns a new instance of Scraper.

Parameters:

  • :query (Hash)

    a customizable set of options



12
13
14
15
16
17
18
19
20
21
22
# File 'lib/google_browse/scraper.rb', line 12

def initialize(query)      
  @links = [] # All the links retrieved are cached here.
  @agent = Mechanize.new do |agent|
    agent.max_history = 1 # We cache the important data ourselves.
    agent.user_agent = 'Safari' # And why not?
    agent.user_agent_alias = 'Mac Safari' # And why not?
    agent.keep_alive = false
  end

  self.query = query
end

Instance Attribute Details

#queryObject

Returns the value of attribute query.



7
8
9
# File 'lib/google_browse/scraper.rb', line 7

def query
  @query
end

Instance Method Details

#[](index) ⇒ Object

Parameters:

  • index (Integer, Range)


30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/google_browse/scraper.rb', line 30

def [](index)
  case index
  when Integer
    retrieve_next_page while more_pages? and index > @links.size 
    @links[index]

  when Range
    retrieve_next_page while more_pages? and index.max > @links.size
    @links[index]

  else
    raise TypeError, "Expected Integer or Range"
  end
end

#more_pages?Boolean

Returns:

  • (Boolean)


9
# File 'lib/google_browse/scraper.rb', line 9

def more_pages?; @more_pages end


8
# File 'lib/google_browse/scraper.rb', line 8

def num_links; @links.size; end