Class: Saper::Browser

Inherits:
Object
  • Object
show all
Defined in:
lib/saper/core/browser.rb

Constant Summary collapse

AGENTS =
{
  :ie6     => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  :ie7     => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
  :ie8     => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
  :ie9     => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
  :mozilla => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
  :safari  => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22',
  :iphone  => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
  :ipad    => 'Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10',
  :android => 'Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13',
  :saper   => 'Mozilla/5.0 (compatible; Saper Ruby client %s)' % Saper::VERSION
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Saper::Browser

Returns a new Browser instance.

Options Hash (options):

  • :agent (Symbol)

    User agent

  • :headers (Hash)

    Additional request headers

  • :logger (Logger)

    Logger instance


31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/saper/core/browser.rb', line 31

def initialize(options = {})
  @agent    = options.delete(:agent) || :saper
  @headers  = options.delete(:headers)
  @logger   = options.delete(:logger) || Saper::Logger.new
  @proxy    = options.delete(:proxy)
  @history  = []
  @received = 0
  @sent     = 0
  @mech = Mechanize.new do |a|
    if @proxy.is_a?(Hash)
      a.set_proxy @proxy[:host], @proxy[:port], @proxy[:user], @proxy[:password]
    end
    a.robots                 = false
    a.user_agent             = agent
    a.request_headers        = headers
    a.pluggable_parser.csv   = nil
    a.pluggable_parser.html  = nil
    a.pluggable_parser.xhtml = nil
    a.pluggable_parser.xml   = nil
  end
  @mech.pre_connect_hook do |agent, req|
    @sent += req.to_hash.to_s.size
    @sent += (req.body.nil? ? 0 : req.body.size)
  end
  @mech.post_connect_hook do |agent, uri, resp, body|
    @received += resp.to_hash.to_s.size
    @received += body.size
  end
  @logger.new_browser(self)
end

Instance Attribute Details

#historyObject (readonly)

Array of requested URLs.


5
6
7
# File 'lib/saper/core/browser.rb', line 5

def history
  @history
end

#receivedObject (readonly)

Approximate number of bytes received.


8
9
10
# File 'lib/saper/core/browser.rb', line 8

def received
  @received
end

#sentObject (readonly)

Approximate number of bytes sent.


11
12
13
# File 'lib/saper/core/browser.rb', line 11

def sent
  @sent
end

Instance Method Details

#agentString

Returns User-Agent string used with requests.


107
108
109
# File 'lib/saper/core/browser.rb', line 107

def agent
  AGENTS[@agent.to_sym] || @agent.to_s
end

#get(url, query = {}) ⇒ Saper::Document

Performs a GET request and returns Saper::Document.


78
79
80
81
82
83
84
85
# File 'lib/saper/core/browser.rb', line 78

def get(url, query = {})
  @logger.new_get_request(url)
  @history.push url
  data = @mech.get(url, query)
  Saper::Items::Document.new data.body, data.uri, data.header
rescue Mechanize::ResponseCodeError
  Saper::Items::Nothing.new # TODO: change to custom exception
end

#headersHash

Returns additional request headers.


70
71
72
# File 'lib/saper/core/browser.rb', line 70

def headers
  @headers.respond_to?(:to_hash) ? @headers : {}
end

#post(url, query = {}, headers = {}) ⇒ Saper::Document

Performs a POST request and returns Saper::Document.


91
92
93
94
95
96
97
98
# File 'lib/saper/core/browser.rb', line 91

def post(url, query = {}, headers = {})
  @logger.new_post_request(url)
  @history.push url
  data = @mech.post(url, query, headers)
  Saper::Items::Document.new data.body, data.uri, data.header
rescue Mechanize::ResponseCodeError
  Saper::Items::Nothing.new # TODO: change to custom exception
end

#post_with_bearer_token(url, query, token) ⇒ Object


101
102
103
# File 'lib/saper/core/browser.rb', line 101

def post_with_bearer_token(url, query, token)
  post(url, query, { "Authorization" => "Bearer %s" % token })
end

#requestsInteger

Returns the number of HTTP requests.


64
65
66
# File 'lib/saper/core/browser.rb', line 64

def requests
  @history.size
end