Class: BaseScraper::Service

Inherits:
Object
  • Object
show all
Defined in:
lib/base_scraper_service/agent_object.rb,
lib/base_scraper_service/base_scraper.rb,
lib/base_scraper_service/location_service.rb

Defined Under Namespace

Classes: LocationService

Instance Method Summary collapse

Instance Method Details

#agent_objectObject



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/base_scraper_service/agent_object.rb', line 6

def agent_object
  agent = Mechanize.new
  agent.read_timeout = 5
  agent.open_timeout = 8
  agent.keep_alive = false
  agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
  agent.user_agent = UserAgent.random
  agent.idle_timeout = 5
  agent.pluggable_parser.default = Mechanize::Page

  # Prevent from <Mechanize::Error: unsupported content-encoding: UTF-8>
  content_encoding_hooks_func = lambda do |a, uri, resp, body_io|
    if resp['Content-Encoding'].to_s == 'UTF-8'
      resp['Content-Encoding'] = 'gzip'
    end
  end
  agent.content_encoding_hooks << content_encoding_hooks_func

  agent
end

#format_isbn(isbn) ⇒ Object



45
46
47
# File 'lib/base_scraper_service/base_scraper.rb', line 45

def format_isbn(isbn)
  StdNum::ISBN.convert_to_13(isbn)
end

#format_price(price) ⇒ Object



40
41
42
43
# File 'lib/base_scraper_service/base_scraper.rb', line 40

def format_price(price)
  return unless price.present?
  price.to_s.strip.gsub("$", "").strip.to_f
end

#logs_enable?Boolean

Returns:

  • (Boolean)


53
54
55
# File 'lib/base_scraper_service/base_scraper.rb', line 53

def logs_enable?
  ENV["ENABLE_LOGS"] == "true"
end

#send_request(agent, uri, params = nil, headers = {}, _optoins = {}) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/base_scraper_service/base_scraper.rb', line 7

def send_request(agent, uri, params = nil, headers = {}, _optoins = {})
  tries = 0
  max_tries = 5
  page = nil

  refresh_agent(agent)
  change_proxy(agent)
  begin
    page = params.nil? ? agent.get(uri, [], nil, headers) : agent.post(uri, params, headers)

  rescue Mechanize::ResponseCodeError => e
    change_proxy(agent)

    tries += 1

    agent.set_proxy(ENV["LPM_SERVER_URL"], ENV["LPM_SERVER_PORT"]) if tries >= 4

    retry if tries < max_tries
  rescue Exception => e
    refresh_agent(agent)
    change_proxy(agent)

    tries += 1

    agent.set_proxy(ENV["LPM_SERVER_URL"], ENV["LPM_SERVER_PORT"]) if tries >= 4

    retry if tries < max_tries
    puts "Message: #{e.message} from #{uri}, proxy: #{agent.proxy_addr}" if logs_enable?
  end

  page
end

#with_exception(long_message: "", status: 500, short_message: "exception") ⇒ Object



49
50
51
# File 'lib/base_scraper_service/base_scraper.rb', line 49

def with_exception(long_message: "", status: 500, short_message: "exception")
  return { offers: [], short_message: short_message, long_message: long_message, status: status }
end