Class: ProxyRotater

Inherits:
Object
  • Object
show all
Defined in:
lib/proxy_rotater.rb,
lib/proxy_rotater/version.rb

Constant Summary collapse

CONCURRENT_PROCESS =
10
PROXY_UPDATE_WAIT =
60 * 15
VERSION =
"0.0.2"

Instance Method Summary collapse

Constructor Details

#initialize(domain, options = {}) ⇒ ProxyRotater

Returns a new instance of ProxyRotater.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/proxy_rotater.rb', line 13

def initialize(domain, options = {})
  @domain = domain
  @interval_sec = 1
  @req_limit_per_hour = 500
  @request_workers = CONCURRENT_PROCESS

  @available = []
  @over_heated = []
  @failed = []

  @custom_fail = []

  get_proxies
end

Instance Method Details

#add_custom_fail(&block) ⇒ Object



80
81
82
# File 'lib/proxy_rotater.rb', line 80

def add_custom_fail(&block)
  @custom_fail << block
end

#checkObject



71
72
73
74
75
76
77
78
# File 'lib/proxy_rotater.rb', line 71

def check
  disable_timeouted
  check_req_limit
  revival
  get_proxies if @available.size < @request_workers
  sleep PROXY_UPDATE_WAIT if @available.empty?
  sort
end

#get(urls, do_retry = true) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/proxy_rotater.rb', line 28

def get(urls, do_retry = true)
  urls = [urls] unless urls.kind_of?(Array)
  retry_url = []

  concurrency = urls.size > @request_workers ? @request_workers : urls.size
  results = urls.each_slice(concurrency).map do |round_url|
    round_result = Parallel.map_with_index(round_url, in_processes: concurrency) do |url, i|
      res = @available[i].get_url(url)
      if res.nil?
        next
      end

      if @custom_fail.any?{|error_proc| error_proc.call(res)}
        @available.timeout = true
        next
      end
      {
        body: res.body,
        response: res.response
      }
    end
    check
    round_result.each_with_index.inject({}) do |hash, value|
      retry_url << round_url[value[1]] if value[0].nil?
      key = round_url[value[1]]
      hash[key] = value[0]
      hash
    end
  end

  r = {}.tap do |hash|
    results.each{|r|hash.merge!(r)}
  end

  unless retry_url.empty?
    if do_retry
      retried = get(retry_url, false)
      r.merge!(retried)
    end
  end
  r
end