Class: BaseScraper::Service
- Inherits:
-
Object
- Object
- BaseScraper::Service
- Defined in:
- lib/base_scraper_service/agent_object.rb,
lib/base_scraper_service/base_scraper.rb,
lib/base_scraper_service/location_service.rb
Defined Under Namespace
Classes: LocationService
Instance Method Summary collapse
- #agent_object ⇒ Object
- #format_isbn(isbn) ⇒ Object
- #format_price(price) ⇒ Object
- #logs_enable? ⇒ Boolean
- #send_request(agent, uri, params = nil, headers = {}, _optoins = {}) ⇒ Object
- #with_exception(long_message: "", status: 500, short_message: "exception") ⇒ Object
Instance Method Details
#agent_object ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/base_scraper_service/agent_object.rb', line 7 def agent_object agent = Mechanize.new agent.read_timeout = 5 agent.open_timeout = 8 agent.keep_alive = false agent.verify_mode = OpenSSL::SSL::VERIFY_NONE agent.user_agent = UserAgent.random agent.idle_timeout = 5 agent.pluggable_parser.default = Mechanize::Page # Prevent from <Mechanize::Error: unsupported content-encoding: UTF-8> content_encoding_hooks_func = lambda do |a, uri, resp, body_io| if resp['Content-Encoding'].to_s == 'UTF-8' resp['Content-Encoding'] = 'gzip' end end agent.content_encoding_hooks << content_encoding_hooks_func agent end |
#format_isbn(isbn) ⇒ Object
55 56 57 |
# File 'lib/base_scraper_service/base_scraper.rb', line 55 def format_isbn(isbn) StdNum::ISBN.convert_to_13(isbn) end |
#format_price(price) ⇒ Object
50 51 52 53 |
# File 'lib/base_scraper_service/base_scraper.rb', line 50 def format_price(price) return unless price.present? price.to_s.strip.gsub("$", "").strip.to_f end |
#logs_enable? ⇒ Boolean
63 64 65 |
# File 'lib/base_scraper_service/base_scraper.rb', line 63 def logs_enable? ENV["ENABLE_LOGS"] == "true" end |
#send_request(agent, uri, params = nil, headers = {}, _optoins = {}) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/base_scraper_service/base_scraper.rb', line 17 def send_request(agent, uri, params = nil, headers = {}, _optoins = {}) tries = 0 max_tries = 5 page = nil refresh_agent(agent) change_proxy(agent) begin page = params.nil? ? agent.get(uri, [], nil, headers) : agent.post(uri, params, headers) rescue Mechanize::ResponseCodeError => e change_proxy(agent) tries += 1 agent.set_proxy(ENV["LPM_SERVER_URL"], ENV["LPM_SERVER_PORT"]) if tries >= 4 retry if tries < max_tries rescue Exception => e refresh_agent(agent) change_proxy(agent) tries += 1 agent.set_proxy(ENV["LPM_SERVER_URL"], ENV["LPM_SERVER_PORT"]) if tries >= 4 retry if tries < max_tries puts "Message: #{e.message} from #{uri}, proxy: #{agent.proxy_addr}" if logs_enable? end page end |
#with_exception(long_message: "", status: 500, short_message: "exception") ⇒ Object
59 60 61 |
# File 'lib/base_scraper_service/base_scraper.rb', line 59 def with_exception(long_message: "", status: 500, short_message: "exception") return { offers: [], short_message: , long_message: , status: status } end |