Class: TinyGrabber::Agent

Inherits:
Object
  • Object
show all
Defined in:
lib/tiny_grabber/agent.rb

Overview

Net::HTTP agent for TinyGrabber Initialize connect with Resource Setting connect attributes

Constant Summary collapse

AGENT_ALIASES =

Agent aliases given from www.useragentstring.com/pages/Chrome/

[
  # Chrome
  'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36',
  'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
  'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
  # Firefox
  'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
  'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0',
  'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0',
  # Internet Explorer
  'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko',
  'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
  'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)',
  'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)',
  # Opera
  'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
  'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
  'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14',
  'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52',
]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeAgent

Initialization object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/tiny_grabber/agent.rb', line 47

def initialize
  @debug = Debug.new

  # Initialize variables agent attributes
  @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
  @proxy = []
  @basic_auth = {}
  @headers = {}
  @cookies = nil
  @read_timeout = 10
  # Initialize variable for URI object
  @uri = nil
  # Initialize variable for Net::HTTP request object
  @http = Net::HTTP
  # Initialize variable for Net::HTTP response object
  @response = nil
  @verify_mode = OpenSSL::SSL::VERIFY_NONE
end

Instance Attribute Details

#basic_authObject

Basic authentification configuration



15
16
17
# File 'lib/tiny_grabber/agent.rb', line 15

def basic_auth
  @basic_auth
end

#cookiesObject

Headers



19
20
21
# File 'lib/tiny_grabber/agent.rb', line 19

def cookies
  @cookies
end

#debugObject

Debug configuration



7
8
9
# File 'lib/tiny_grabber/agent.rb', line 7

def debug
  @debug
end

#headersObject

Headers



17
18
19
# File 'lib/tiny_grabber/agent.rb', line 17

def headers
  @headers
end

#proxyObject

Remote proxy configuration



13
14
15
# File 'lib/tiny_grabber/agent.rb', line 13

def proxy
  @proxy
end

#read_timeoutObject

Max time to execute request



9
10
11
# File 'lib/tiny_grabber/agent.rb', line 9

def read_timeout
  @read_timeout
end

#user_agentObject

Web browser name



11
12
13
# File 'lib/tiny_grabber/agent.rb', line 11

def user_agent
  @user_agent
end

Instance Method Details

#fetch(url, method = :get, headers = {}, params = {}) ⇒ Object

Fetch request for GET and POST HTTP methods Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute Make response and save COOKIES for next requests

Parameters:

  • url

    Resource link

  • method (defaults to: :get)

    Request method

  • headers (defaults to: {})

    Request header

  • params (defaults to: {})

    Request additional params



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/tiny_grabber/agent.rb', line 180

def fetch url, method = :get, headers = {}, params = {}
  if @debug.active
    @debug.save '=============================='
    @debug.save "#{method.upcase} #{url}"
    @debug.save "-> [proxy] = #{@proxy}" if @proxy
    @debug.save "-> [params] = #{params}"
    @debug.save '------------------------------'
  end
  set_uri url
  case method
    when :get
      @request = Net::HTTP::Get.new(@uri.request_uri)
    when :post
      @request = Net::HTTP::Post.new(@uri.request_uri)
      @request.set_form_data(params)
  end
  set_user_agent if @user_agent
  set_basic_auth unless @basic_auth.empty?
  @headers = headers unless headers.empty?
  set_headers unless @headers.empty?
  set_cookies if @cookies
  @response = send_request
  case @response
    # HTTP response code 1xx
    when Net::HTTPInformation
      @debug.save "<- [response] = Net::HTTPInformation" if @debug.active
    # HTTP response code 2xx
    when Net::HTTPSuccess
      save_headers if @response.header
      save_cookies if @response.cookies
      @debug.save "<- [response] = #{@response.code} Net::HTTPSuccess" if @debug.active
    # HTTP response code 3xx
    when Net::HTTPRedirection
      @debug.save "<- [response] = #{@response.code} Net::HTTPRedirection" if @debug.active
    # HTTP response code 4xx
    when Net::HTTPClientError
      @debug.save "<- [response] = #{@response.code} Net::HTTPClientError" if @debug.active
    # HTTP response code 5xx
    when Net::HTTPServerError
      @debug.save "<- [response] = #{@response.code} Net::HTTPServerError" if @debug.active
  end
  @debug.save_to_file @response.body  if @debug.save_html
  @response
end

#resetObject

Clears headers and cookies



300
301
302
303
# File 'lib/tiny_grabber/agent.rb', line 300

def reset
  @headers = {}
  @cookies = nil
end

#save_cookiesObject

Save response cookies in agent attribute



292
293
294
295
# File 'lib/tiny_grabber/agent.rb', line 292

def save_cookies
  @cookies = @response.cookies
  @debug.save "<- [cookies] = #{@cookies}" if @debug.active
end

#save_headersObject

Save response headers in agent attribute



282
283
284
285
286
287
# File 'lib/tiny_grabber/agent.rb', line 282

def save_headers
  @headers = @response.headers
  # Delete header TRANSFER_ENCODING for chain of requests
  @headers.delete('transfer-encoding')
  @debug.save "<- [headers] = #{@headers}" if @debug.active
end

#send_requestObject

Send request and get response Use SSL connect for HTTPS link scheme



272
273
274
275
276
277
# File 'lib/tiny_grabber/agent.rb', line 272

def send_request
  @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https', verify_mode: @verify_mode, read_timeout: @read_timeout) do |http|
    @debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active
    http.request(@request)
  end
end

#set_basic_authObject

Set BASIC_AUTH request authentification



247
248
249
250
# File 'lib/tiny_grabber/agent.rb', line 247

def set_basic_auth
  @request.basic_auth @basic_auth[:username], @basic_auth[:password]
  @debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active
end

#set_cookiesObject

Set request COOKIES



263
264
265
266
# File 'lib/tiny_grabber/agent.rb', line 263

def set_cookies
  @request['Cookie'] = @cookies
  @debug.save "-> [cookies] = #{@cookies}" if @debug.active
end

#set_headersObject

Set request HEADERS



255
256
257
258
# File 'lib/tiny_grabber/agent.rb', line 255

def set_headers
  @headers.each { |k, v| @request.add_field(String(k), v) }
  @debug.save "-> [headers] = #{@headers}" if @debug.active
end

#set_uri(url) ⇒ Object

Initialize URI object from request url

Parameters:

  • url

    Request link



230
231
232
233
234
# File 'lib/tiny_grabber/agent.rb', line 230

def set_uri url
  # It's magic work with escaped url
  @uri = URI(URI.escape(URI.unescape(url)))
  @debug.save "-> [uri] = #{@uri}" if @debug.active
end

#set_user_agentObject

Set USER_AGENT request attribute



239
240
241
242
# File 'lib/tiny_grabber/agent.rb', line 239

def set_user_agent
  @headers['User-Agent'] = @user_agent
  @debug.save "-> [user_agent] = #{@user_agent}" if @debug.active
end

#var_to_sym(var, str_to_sym = false) ⇒ Object

Convert variables and contains to symbol

Parameters:

  • var

    Variable need to convert



309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/tiny_grabber/agent.rb', line 309

def var_to_sym var, str_to_sym = false
  if var.is_a?(Hash)
    result = {}
    var.each do |k, v|
      result[k.to_sym] = var_to_sym(v, str_to_sym)
    end
  elsif var.is_a?(Array)
    result = []
    var.each do |v|
      result << var_to_sym(v, str_to_sym)
    end
  elsif var.is_a?(String)
    result = str_to_sym ? var.to_sym : var
  else
    result = var
  end
  result
end

#verify_mode=(verify_mode) ⇒ Object

Set verify_mode

Parameters:

  • verify_mode

    SSL verify_mode



166
167
168
# File 'lib/tiny_grabber/agent.rb', line 166

def verify_mode= verify_mode
  @verify_mode = verify_mode
end