Class: BlackStack::Bots::Google

Inherits:
MechanizeBot show all
Defined in:
lib/google.rb

Instance Attribute Summary

Attributes inherited from MechanizeBot

#agent

Attributes inherited from Bot

#ip, #password, #port_index, #ports, #user

Instance Method Summary collapse

Methods inherited from MechanizeBot

#initialize

Methods inherited from Bot

#initialize, #proxy?

Constructor Details

This class inherits a constructor from BlackStack::Bots::MechanizeBot

Instance Method Details

#search(query) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/google.rb', line 4

def search(query)
    ret = []
    # initialize mechanize agent
    self.agent = Mechanize.new
    # set a proxy with user and password
    self.port_index += 1
    self.port_index = 0 if self.port_index >= self.ports.length
    self.agent.set_proxy(self.ip, self.ports[self.port_index], self.user, self.password) if self.proxy?
    # grab the page
    page = agent.get('http://www.google.com/')
    google_form = page.form('f')
    google_form.q = query
    page = agent.submit(google_form, google_form.buttons.first)
    # iterate divs with class starting with 'g '
    page.search('h3').each do |h3|
        # get the class of the div
        title = h3.text.strip
        # get the link inside the div
        a = h3.parent.parent.parent
        href = a['href']
        descr = a.parent.parent.css('/div').last.text.strip
        # get the value of the paremter with name param1 from the querystring using URI
        uri = URI.parse(href)
        params = CGI.parse(uri.query)
        url = params['q'].first
        # add to the list array of results
        ret << { :title=>title, :url=>url, :description=>descr }
    end
    # destroy mechanize agent
    self.agent.shutdown
    # return
    ret
end