Class: TinyGrabber::Agent

Inherits:
Object
  • Object
show all
Defined in:
lib/tiny_grabber/agent.rb

Overview

Net::HTTP agent for TinyGrabber Initialize connect with Resource Setting connect attributes

Constant Summary collapse

AGENT_ALIASES =

Agent aliases given from www.useragentstring.com/pages/Chrome/

[
  # Chrome
  'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
  'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
  'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
  # Firefox
  'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
  'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
  'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0',
  # Internet Explorer
  'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
  'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
  'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)',
  'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)',
  # Opera
  'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
  'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
  'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
  'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52',
]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeAgent

Initialization object



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/tiny_grabber/agent.rb', line 51

def initialize
  @debug = false
  @debug_destination = :file
  @debug_save_html = false

  # Initialize variables agent attributes
  @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
  @proxy = []
  @basic_auth = {}
  @headers = {}
  @cookies = nil
  @read_timeout = 10
  # Initialize variable for URI object
  @uri = nil
  # Initialize variable for Net::HTTP request object
  @http = Net::HTTP
  # Initialize variable for Net::HTTP response object
  @response = nil
end

Instance Attribute Details

#basic_authObject

Basic authentification configuration



19
20
21
# File 'lib/tiny_grabber/agent.rb', line 19

def basic_auth
  @basic_auth
end

#cookiesObject

Headers



23
24
25
# File 'lib/tiny_grabber/agent.rb', line 23

def cookies
  @cookies
end

#debugObject

Debug flag for detilazition log and save result HTML to /log/*.html file



7
8
9
# File 'lib/tiny_grabber/agent.rb', line 7

def debug
  @debug
end

#debug_destinationObject

Debug destination type



9
10
11
# File 'lib/tiny_grabber/agent.rb', line 9

def debug_destination
  @debug_destination
end

#debug_save_htmlObject

Debug flag for save html in file



11
12
13
# File 'lib/tiny_grabber/agent.rb', line 11

def debug_save_html
  @debug_save_html
end

#headersObject

Headers



21
22
23
# File 'lib/tiny_grabber/agent.rb', line 21

def headers
  @headers
end

#proxyObject

Remote proxy configuration



17
18
19
# File 'lib/tiny_grabber/agent.rb', line 17

def proxy
  @proxy
end

#read_timeoutObject

Max time to execute request



13
14
15
# File 'lib/tiny_grabber/agent.rb', line 13

def read_timeout
  @read_timeout
end

#user_agentObject

Web browser name



15
16
17
# File 'lib/tiny_grabber/agent.rb', line 15

def user_agent
  @user_agent
end

Instance Method Details

#fetch(url, method = :get, headers = {}, params = {}) ⇒ Object

Fetch request for GET and POST HTTP methods Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute Make response and save COOKIES for next requests

Parameters:

  • url

    Resource link

  • method (defaults to: :get)

    Request method

  • headers (defaults to: {})

    Request header

  • params (defaults to: {})

    Request additional params



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/tiny_grabber/agent.rb', line 155

def fetch url, method = :get, headers = {}, params = {}
  if @debug
    Debug::save @debug_destination, '=============================='
    Debug::save @debug_destination, "#{method.upcase} #{url}"
    Debug::save @debug_destination, "-> [params] = #{params}"
    Debug::save @debug_destination, '------------------------------'
  end
  set_uri url
  case method
    when :get
      @request = Net::HTTP::Get.new(@uri.request_uri)
    when :post
      @request = Net::HTTP::Post.new(@uri.request_uri)
      @request.set_form_data(params)
  end
  set_user_agent if @user_agent
  set_basic_auth unless @basic_auth.empty?
  @headers = headers unless headers.empty?
  set_headers unless @headers.empty?
  set_cookies if @cookies
  @response = send_request
  case @response
    # HTTP response code 1xx
    when Net::HTTPInformation
      Debug::save @debug_destination, "<- [response] = Net::HTTPInformation" if @debug
    # HTTP response code 2xx
    when Net::HTTPSuccess
      save_headers if @response.header
      save_cookies if @response.cookies
      Debug::save @debug_destination, "<- [response] = #{@response.code} Net::HTTPSuccess" if @debug
    # HTTP response code 3xx
    when Net::HTTPRedirection
      Debug::save @debug_destination, "<- [response] = #{@response.code} Net::HTTPRedirection" if @debug
    # HTTP response code 4xx
    when Net::HTTPClientError
      Debug::save @debug_destination, "<- [response] = #{@response.code} Net::HTTPClientError" if @debug
    # HTTP response code 5xx
    when Net::HTTPServerError
      Debug::save @debug_destination, "<- [response] = #{@response.code} Net::HTTPServerError" if @debug
  end
  Debug::save_to_file @response.body  if @debug_save_html
  @response
end

#resetObject

Clears headers and cookies



275
276
277
278
# File 'lib/tiny_grabber/agent.rb', line 275

def reset
  @headers = {}
  @cookies = nil
end

#save_cookiesObject

Save response cookies in agent attribute



267
268
269
270
# File 'lib/tiny_grabber/agent.rb', line 267

def save_cookies
  @cookies = @response.cookies
  Debug::save @debug_destination, "<- [cookies] = #{@cookies}" if @debug
end

#save_headersObject

Save response headers in agent attribute



257
258
259
260
261
262
# File 'lib/tiny_grabber/agent.rb', line 257

def save_headers
  @headers = @response.headers
  # Delete header TRANSFER_ENCODING for chain of requests
  @headers.delete('transfer-encoding')
  Debug::save @debug_destination, "<- [headers] = #{@headers}" if @debug
end

#send_requestObject

Send request and get response Use SSL connect for HTTPS link scheme



246
247
248
249
250
251
252
# File 'lib/tiny_grabber/agent.rb', line 246

def send_request
  @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https') do |http|
    http.read_timeout = @read_timeout
    Debug::save @debug_destination, "-> [read_timeout] = #{@read_timeout}" if @debug
    http.request(@request)
  end
end

#set_basic_authObject

Set BASIC_AUTH request authentification



221
222
223
224
# File 'lib/tiny_grabber/agent.rb', line 221

def set_basic_auth
  @request.basic_auth @basic_auth[:username], @basic_auth[:password]
  Debug::save @debug_destination, "-> [basic_auth] = #{@basic_auth}" if @debug
end

#set_cookiesObject

Set request COOKIES



237
238
239
240
# File 'lib/tiny_grabber/agent.rb', line 237

def set_cookies
  @request['Cookie'] = @cookies
  Debug::save @debug_destination, "-> [cookies] = #{@cookies}" if @debug
end

#set_headersObject

Set request HEADERS



229
230
231
232
# File 'lib/tiny_grabber/agent.rb', line 229

def set_headers
  @headers.each { |k, v| @request.add_field(String(k), v) }
  Debug::save @debug_destination, "-> [headers] = #{@headers}" if @debug
end

#set_uri(url) ⇒ Object

Initialize URI object from request url

Parameters:

  • url

    Request link



204
205
206
207
208
# File 'lib/tiny_grabber/agent.rb', line 204

def set_uri url
  # It's magic work with escaped url
  @uri = URI(URI.escape(URI.unescape(url)))
  Debug::save @debug_destination, "-> [uri] = #{@uri}" if @debug
end

#set_user_agentObject

Set USER_AGENT request attribute



213
214
215
216
# File 'lib/tiny_grabber/agent.rb', line 213

def set_user_agent
  @headers['User-Agent'] = @user_agent
  Debug::save @debug_destination, "-> [user_agent] = #{@user_agent}" if @debug
end