Class: HttpUtilities::Proxies::ProxyChecker

Inherits:
Object
  • Object
show all
Defined in:
lib/http_utilities/proxies/proxy_checker.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeProxyChecker

Returns a new instance of ProxyChecker.



12
13
14
15
16
17
18
19
20
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 12

def initialize
  self.client                       =   HttpUtilities::Http::Client.new

  self.processed_proxies            =   []
  
  self.limit                        =   1000
  self.minimum_successful_attempts  =   1
  self.maximum_failed_attempts      =   10
end

Instance Attribute Details

#clientObject

Returns the value of attribute client.



9
10
11
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 9

def client
  @client
end

#limitObject

Returns the value of attribute limit.



10
11
12
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 10

def limit
  @limit
end

#maximum_failed_attemptsObject

Returns the value of attribute maximum_failed_attempts.



10
11
12
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 10

def maximum_failed_attempts
  @maximum_failed_attempts
end

#minimum_successful_attemptsObject

Returns the value of attribute minimum_successful_attempts.



10
11
12
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 10

def minimum_successful_attempts
  @minimum_successful_attempts
end

#processed_proxiesObject

Returns the value of attribute processed_proxies.



9
10
11
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 9

def processed_proxies
  @processed_proxies
end

Instance Method Details

#check_and_update_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts) ⇒ Object



22
23
24
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 22

def check_and_update_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts)
  check_proxies(protocol: protocol, proxy_type: proxy_type, mode: mode, maximum_failed_attempts: maximum_failed_attempts)
end

#check_http_proxy(proxy, test_url: "http://www.google.com/robots.txt", timeout: 10) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 82

def check_http_proxy(proxy, test_url: "http://www.google.com/robots.txt", timeout: 10)
  options       =   {
                      use_proxy:       true,
                      proxy:           {host: proxy.host, port: proxy.port}, 
                      proxy_protocol:  proxy.protocol,
                      timeout:         timeout
                    }
  
  options.merge!(proxy_username: proxy.username) if proxy.username && proxy.username.present?
  options.merge!(proxy_password: proxy.password) if proxy.password && proxy.password.present?

  Rails.logger.info "#{Time.now}: Fetching robots.txt for Google.com with proxy #{proxy.proxy_address}. Using authentication? #{options.has_key?(:proxy_username).to_s}"
  
  response       =   self.client.get(test_url, options: options)
  valid_proxy    =   (response && response.body && response.body =~ /Allow: \/search\/about/i)

  update_proxy(proxy, valid_proxy)
end

#check_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 26

def check_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts)
  proxies                           =   Proxy.should_be_checked(
    protocol:                 protocol,
    proxy_type:               proxy_type,
    date:                     Time.now,
    limit:                    self.limit,
    maximum_failed_attempts:  maximum_failed_attempts
  )

  if (proxies && proxies.any?)
    Rails.logger.info "Found #{proxies.size} #{proxy_type} proxies to check."

    proxies.each do |proxy|
      case mode
        when :synchronous
          check_proxy(proxy)
        when :resque
          Resque.enqueue(HttpUtilities::Jobs::Resque::Proxies::CheckProxyJob, proxy.id.to_s)
        when :sidekiq
          HttpUtilities::Jobs::Sidekiq::Proxies::CheckProxyJob.perform_async(proxy.id.to_s)
      end
    end

  else
    Rails.logger.info "Couldn't find any proxies to check!"
  end
end

#check_proxy(proxy) ⇒ Object



54
55
56
57
58
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 54

def check_proxy(proxy)
  Rails.logger.info "#{Time.now}: Will check if proxy #{proxy.proxy_address} is working."
  
  self.send("check_#{proxy.protocol}_proxy", proxy)
end

#check_socks_proxy(proxy, test_host: "whois.verisign-grs.com", test_port: 43, test_query: "=google.com", timeout: 30) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 60

def check_socks_proxy(proxy, test_host: "whois.verisign-grs.com", test_port: 43, test_query: "=google.com", timeout: 30)
  valid_proxy     =   false

  begin
    socks_proxy   =   Net::SSH::Proxy::SOCKS5.new(proxy.host, proxy.port, proxy.socks_proxy_credentials)
    client        =   socks_proxy.open(test_host, test_port, {timeout: timeout})
  
    client.write("#{test_query}\r\n")
    response      =   client.read
    
    valid_proxy   =   (response && response.present?)
    
    client.close
  
  rescue StandardError => e
    Rails.logger.error "Exception occured while trying to check proxy #{proxy.proxy_address}. Error Class: #{e.class}. Error Message: #{e.message}"
    valid_proxy   =   false
  end
  
  update_proxy(proxy, valid_proxy)
end

#update_proxiesObject



101
102
103
104
105
106
107
108
109
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 101

def update_proxies
  Rails.logger.info "Updating/Importing #{self.processed_proxies.size} proxies"

  if (self.processed_proxies && self.processed_proxies.any?)
    self.processed_proxies.each do |value|
      update_proxy(value[:proxy], value[:valid])
    end
  end
end

#update_proxy(proxy, valid) ⇒ Object



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/http_utilities/proxies/proxy_checker.rb', line 111

def update_proxy(proxy, valid)
  Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is #{valid ? "working" : "not working"}!"
  
  successful_attempts         =   proxy.successful_attempts || 0
  failed_attempts             =   proxy.failed_attempts || 0

  if (valid)
    successful_attempts      +=  1
  else
    failed_attempts          +=  1
  end

  is_valid                    =   (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
  
  proxy.valid_proxy           =   is_valid
  proxy.successful_attempts   =   successful_attempts
  proxy.failed_attempts       =   failed_attempts
  proxy.last_checked_at       =   Time.now
  proxy.save
end