Class: BaseScraper::Service
- Inherits:
-
Object
- Object
- BaseScraper::Service
- Defined in:
- lib/base_scraper_service/agent_object.rb,
lib/base_scraper_service/base_scraper.rb,
lib/base_scraper_service/location_service.rb
Defined Under Namespace
Classes: LocationService
Instance Method Summary collapse
- #agent_object ⇒ Object
- #format_isbn(isbn) ⇒ Object
- #format_price(price) ⇒ Object
- #logs_enable? ⇒ Boolean
- #send_request(agent, uri, params = nil, headers = {}, _optoins = {}) ⇒ Object
- #with_exception(long_message: "", status: 500, short_message: "exception") ⇒ Object
Instance Method Details
#agent_object ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/base_scraper_service/agent_object.rb', line 6 def agent_object agent = Mechanize.new agent.read_timeout = 5 agent.open_timeout = 8 agent.keep_alive = false agent.verify_mode = OpenSSL::SSL::VERIFY_NONE agent.user_agent = UserAgent.random agent.idle_timeout = 5 agent.pluggable_parser.default = Mechanize::Page # Prevent from <Mechanize::Error: unsupported content-encoding: UTF-8> content_encoding_hooks_func = lambda do |a, uri, resp, body_io| if resp['Content-Encoding'].to_s == 'UTF-8' resp['Content-Encoding'] = 'gzip' end end agent.content_encoding_hooks << content_encoding_hooks_func agent end |
#format_isbn(isbn) ⇒ Object
45 46 47 |
# File 'lib/base_scraper_service/base_scraper.rb', line 45 def format_isbn(isbn) StdNum::ISBN.convert_to_13(isbn) end |
#format_price(price) ⇒ Object
40 41 42 43 |
# File 'lib/base_scraper_service/base_scraper.rb', line 40 def format_price(price) return unless price.present? price.to_s.strip.gsub("$", "").strip.to_f end |
#logs_enable? ⇒ Boolean
53 54 55 |
# File 'lib/base_scraper_service/base_scraper.rb', line 53 def logs_enable? ENV["ENABLE_LOGS"] == "true" end |
#send_request(agent, uri, params = nil, headers = {}, _optoins = {}) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/base_scraper_service/base_scraper.rb', line 7 def send_request(agent, uri, params = nil, headers = {}, _optoins = {}) tries = 0 max_tries = 5 page = nil refresh_agent(agent) change_proxy(agent) begin page = params.nil? ? agent.get(uri, [], nil, headers) : agent.post(uri, params, headers) rescue Mechanize::ResponseCodeError => e change_proxy(agent) tries += 1 agent.set_proxy(ENV["LPM_SERVER_URL"], ENV["LPM_SERVER_PORT"]) if tries >= 4 retry if tries < max_tries rescue Exception => e refresh_agent(agent) change_proxy(agent) tries += 1 agent.set_proxy(ENV["LPM_SERVER_URL"], ENV["LPM_SERVER_PORT"]) if tries >= 4 retry if tries < max_tries puts "Message: #{e.message} from #{uri}, proxy: #{agent.proxy_addr}" if logs_enable? end page end |
#with_exception(long_message: "", status: 500, short_message: "exception") ⇒ Object
49 50 51 |
# File 'lib/base_scraper_service/base_scraper.rb', line 49 def with_exception(long_message: "", status: 500, short_message: "exception") return { offers: [], short_message: , long_message: , status: status } end |