Class: TinyGrabber::Agent
- Inherits:
-
Object
- Object
- TinyGrabber::Agent
- Defined in:
- lib/tiny_grabber/agent.rb
Constant Summary collapse
- AGENT_ALIASES =
Agent aliases given from www.useragentstring.com/pages/Chrome/
[ # Chrome 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36', # Firefox 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0', 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0', # Internet Explorer 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko', 'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0', 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)', 'Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)', # Opera 'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16', 'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14', 'Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14', 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52' ].freeze
Instance Attribute Summary collapse
-
#basic_auth ⇒ Object
writeonly
Set BASIC_AUTH agent attribute.
-
#cookies ⇒ Object
Headers.
-
#debug ⇒ Object
writeonly
Set debug configuration.
-
#follow_location ⇒ Object
writeonly
Init follow location for redirect.
-
#headers ⇒ Object
Headers.
-
#proxy ⇒ Object
Remote proxy configuration.
-
#read_timeout ⇒ Object
writeonly
Set READ_TIMEOUT agent attribute.
-
#uri ⇒ Object
Uri.
-
#user_agent ⇒ Object
writeonly
Set USER_AGENT agent attribute.
-
#verify_mode ⇒ Object
writeonly
Set verify mode.
Instance Method Summary collapse
-
#convert_to_uri(url) ⇒ Object
Initialize URI object from request url.
-
#fetch(url, method = :get, headers = {}, params = {}) ⇒ Object
Fetch request for GET and POST HTTP methods Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute Make response and save COOKIES for next requests.
-
#initialize ⇒ Agent
constructor
Initialization object.
-
#reset ⇒ Object
Clears headers and cookies.
-
#save_cookies ⇒ Object
Save response cookies in agent attribute.
-
#save_headers ⇒ Object
Save response headers in agent attribute.
-
#send_request ⇒ Object
Send request and get response Use SSL connect for HTTPS link scheme.
-
#set_basic_auth ⇒ Object
Set BASIC_AUTH request authentification.
-
#set_cookies ⇒ Object
Set request COOKIES.
-
#set_headers ⇒ Object
Set request HEADERS.
-
#set_user_agent ⇒ Object
Set USER_AGENT request attribute.
-
#var_to_sym(var, str_to_sym = false) ⇒ Object
Convert variables and contains to symbol.
Constructor Details
#initialize ⇒ Agent
Initialization object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/tiny_grabber/agent.rb', line 54 def initialize @debug = Debug.new # Initialize variables agent attributes @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1] @proxy = [] @basic_auth = {} @headers = {} @cookies = nil @follow_location = false @read_timeout = 10 # Initialize variable for URI object @uri = nil # Initialize variable for Net::HTTP request object @http = Net::HTTP # Initialize variable for Net::HTTP response object @response = nil @verify_mode = OpenSSL::SSL::VERIFY_NONE end |
Instance Attribute Details
#basic_auth=(basic_auth) ⇒ Object (writeonly)
Set BASIC_AUTH agent attribute
16 17 18 |
# File 'lib/tiny_grabber/agent.rb', line 16 def basic_auth=(value) @basic_auth = value end |
#cookies ⇒ Object
Headers
20 21 22 |
# File 'lib/tiny_grabber/agent.rb', line 20 def @cookies end |
#debug=(debug) ⇒ Object (writeonly)
Set debug configuration
8 9 10 |
# File 'lib/tiny_grabber/agent.rb', line 8 def debug=(value) @debug = value end |
#follow_location=(follow_location) ⇒ Object (writeonly)
Init follow location for redirect
24 25 26 |
# File 'lib/tiny_grabber/agent.rb', line 24 def follow_location=(value) @follow_location = value end |
#headers ⇒ Object
Headers
18 19 20 |
# File 'lib/tiny_grabber/agent.rb', line 18 def headers @headers end |
#proxy ⇒ Object
Remote proxy configuration
14 15 16 |
# File 'lib/tiny_grabber/agent.rb', line 14 def proxy @proxy end |
#read_timeout=(read_timeout) ⇒ Object (writeonly)
Set READ_TIMEOUT agent attribute
10 11 12 |
# File 'lib/tiny_grabber/agent.rb', line 10 def read_timeout=(value) @read_timeout = value end |
#uri ⇒ Object
Uri
26 27 28 |
# File 'lib/tiny_grabber/agent.rb', line 26 def uri @uri end |
#user_agent=(user_agent) ⇒ Object (writeonly)
Set USER_AGENT agent attribute
12 13 14 |
# File 'lib/tiny_grabber/agent.rb', line 12 def user_agent=(value) @user_agent = value end |
#verify_mode=(value) ⇒ Object (writeonly)
Set verify mode
22 23 24 |
# File 'lib/tiny_grabber/agent.rb', line 22 def verify_mode=(value) @verify_mode = value end |
Instance Method Details
#convert_to_uri(url) ⇒ Object
Initialize URI object from request url
251 252 253 254 255 256 257 |
# File 'lib/tiny_grabber/agent.rb', line 251 def convert_to_uri(url) # Remove anchor url = url.gsub(/#.*\Z/, '') # It's magic work with escaped url @uri = URI(URI.escape(URI.unescape(url))) @debug.save "-> [uri] = #{@uri}" if @debug.active end |
#fetch(url, method = :get, headers = {}, params = {}) ⇒ Object
Fetch request for GET and POST HTTP methods Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute Make response and save COOKIES for next requests
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
# File 'lib/tiny_grabber/agent.rb', line 188 def fetch(url, method = :get, headers = {}, params = {}) if @debug.active @debug.save '==============================' @debug.save "#{method.upcase} #{url}" @debug.save "-> [proxy] = #{@proxy}" if @proxy @debug.save "-> [params] = #{params}" @debug.save '------------------------------' end convert_to_uri url case method when :get @request = Net::HTTP::Get.new(@uri.request_uri) when :post @request = Net::HTTP::Post.new(@uri.request_uri) @request.set_form_data(params) end set_user_agent if @user_agent set_basic_auth unless @basic_auth.empty? @headers = headers unless headers.empty? set_headers if @headers if @cookies @response = send_request case @response # HTTP response code 1xx when Net::HTTPInformation @debug.save '<- [response] = Net::HTTPInformation' if @debug.active # HTTP response code 2xx when Net::HTTPSuccess save_headers @debug.save "<- [response] = #{@response.code} Net::HTTPSuccess" if @debug.active # Follow meta refresh if @follow_location refresh = @response.ng.at_css('meta[http-equiv="refresh"]') @response = fetch refresh.attr('content').gsub(/\A.*?(http)/, 'http') if refresh end # HTTP response code 3xx when Net::HTTPRedirection @debug.save "<- [response] = #{@response.code} Net::HTTPRedirection" if @debug.active @debug.save 'try curl user_agent: tg.user_agent=\'curl\'' if @debug.active # Follow location if @follow_location @response = fetch @response.header['Location'] else save_headers end # HTTP response code 4xx when Net::HTTPClientError @debug.save "<- [response] = #{@response.code} Net::HTTPClientError" if @debug.active # HTTP response code 5xx when Net::HTTPServerError @debug.save "<- [response] = #{@response.code} Net::HTTPServerError" if @debug.active end @response.uri = @uri @debug.save_to_file @response.body if @debug.save_html @response end |
#reset ⇒ Object
Clears headers and cookies
329 330 331 332 |
# File 'lib/tiny_grabber/agent.rb', line 329 def reset @headers = {} @cookies = nil end |
#save_cookies ⇒ Object
Save response cookies in agent attribute
317 318 319 320 321 322 323 324 325 |
# File 'lib/tiny_grabber/agent.rb', line 317 def if @response.respond_to?(:cookies) return unless @response. @cookies = @response. else return unless @response['Set-Cookie'] @cookies = @response['Set-Cookie'] end end |
#save_headers ⇒ Object
Save response headers in agent attribute
307 308 309 310 311 312 313 |
# File 'lib/tiny_grabber/agent.rb', line 307 def save_headers return unless @response.header @headers = @response.header # Delete header TRANSFER_ENCODING for chain of requests @headers.delete('transfer-encoding') @debug.save "<- [headers] = #{@headers}" if @debug.active end |
#send_request ⇒ Object
Send request and get response Use SSL connect for HTTPS link scheme
298 299 300 301 302 303 |
# File 'lib/tiny_grabber/agent.rb', line 298 def send_request @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https', verify_mode: @verify_mode, read_timeout: @read_timeout) do |http| @debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active http.request(@request) end end |
#set_basic_auth ⇒ Object
Set BASIC_AUTH request authentification
268 269 270 271 |
# File 'lib/tiny_grabber/agent.rb', line 268 def set_basic_auth @request.basic_auth @basic_auth[:username], @basic_auth[:password] @debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active end |
#set_cookies ⇒ Object
Set request COOKIES
290 291 292 293 |
# File 'lib/tiny_grabber/agent.rb', line 290 def @request['Cookie'] = @cookies @debug.save "-> [cookies] = #{@cookies}" if @debug.active end |
#set_headers ⇒ Object
Set request HEADERS
275 276 277 278 279 280 281 282 283 284 285 286 |
# File 'lib/tiny_grabber/agent.rb', line 275 def set_headers @headers.each do |k, v| k = String(k) case k when 'Accept' @request[k] = v else @request.add_field(k, v) end end @debug.save "-> [headers] = #{@headers}" if @debug.active end |
#set_user_agent ⇒ Object
Set USER_AGENT request attribute
261 262 263 264 |
# File 'lib/tiny_grabber/agent.rb', line 261 def set_user_agent @headers['User-Agent'] = @user_agent @debug.save "-> [user_agent] = #{@user_agent}" if @debug.active end |
#var_to_sym(var, str_to_sym = false) ⇒ Object
Convert variables and contains to symbol
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 |
# File 'lib/tiny_grabber/agent.rb', line 338 def var_to_sym(var, str_to_sym = false) if var.is_a?(Hash) result = {} var.each do |k, v| result[k.to_sym] = var_to_sym(v, str_to_sym) end elsif var.is_a?(Array) result = [] var.each do |v| result << var_to_sym(v, str_to_sym) end elsif var.is_a?(String) result = str_to_sym ? var.to_sym : var else result = var end result end |