Class: RequestManager
- Inherits:
-
Object
- Object
- RequestManager
- Defined in:
- lib/requestmanager.rb
Instance Method Summary collapse
-
#gen_driver(chosen_proxy) ⇒ Object
Generate driver for searches.
-
#get_page(url, form_input = nil) ⇒ Object
Get the page requested.
-
#get_random_proxy(url) ⇒ Object
Choose a random proxy that hasn’t been used recently.
-
#initialize(proxy_list, request_interval) ⇒ RequestManager
constructor
A new instance of RequestManager.
-
#is_not_used?(chosen, url) ⇒ Boolean
Checks if a proxy has been used on domain in the last 20 seconds.
-
#parse_proxy_list(proxy_file) ⇒ Object
Parse the proxy list.
Constructor Details
#initialize(proxy_list, request_interval) ⇒ RequestManager
Returns a new instance of RequestManager.
7 8 9 10 11 |
# File 'lib/requestmanager.rb', line 7 def initialize(proxy_list, request_interval) @proxy_list = parse_proxy_list(proxy_list) @request_interval = request_interval @used_proxies = Hash.new end |
Instance Method Details
#gen_driver(chosen_proxy) ⇒ Object
Generate driver for searches
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/requestmanager.rb', line 35 def gen_driver(chosen_proxy) # Profile settings profile = Selenium::WebDriver::Firefox::Profile.new profile['intl.accept_languages'] = 'en' # Set proxy if proxy list, otherwise sleep if chosen_proxy proxy = Selenium::WebDriver::Proxy.new(http: chosen_proxy, ssl: chosen_proxy) profile.proxy = proxy else sleep(rand(@request_interval[0]..@request_interval[1])) end return Selenium::WebDriver.for :firefox, profile: profile end |
#get_page(url, form_input = nil) ⇒ Object
Get the page requested
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/requestmanager.rb', line 14 def get_page(url, form_input = nil) chosen_proxy = @proxy_list != nil ? get_random_proxy(url) : nil driver = gen_driver(chosen_proxy) driver.navigate.to url puts "Getting page " + url # Handle form input if there is any if form_input element = driver.find_element(name: "q") element.send_keys form_input element.submit end # Sleep while things load then save sleep(7) page_html = driver.page_source driver.quit return page_html end |
#get_random_proxy(url) ⇒ Object
Choose a random proxy that hasn’t been used recently
52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/requestmanager.rb', line 52 def get_random_proxy(url) max = @proxy_list.length chosen = @proxy_list[Random.rand(max)] # Only use proxy if it hasn't been used in last n seconds on same host if is_not_used?(chosen, url) @used_proxies[chosen] = [Time.now, URI.parse(url).host] return chosen[0]+":"+chosen[1] else sleep(0.005) get_random_proxy(url) end end |
#is_not_used?(chosen, url) ⇒ Boolean
Checks if a proxy has been used on domain in the last 20 seconds
67 68 69 70 71 |
# File 'lib/requestmanager.rb', line 67 def is_not_used?(chosen, url) return (!@used_proxies[chosen] || @used_proxies[chosen][0] <= Time.now-@request_interval[0] || @used_proxies[chosen][1] != URI.parse(url).host) end |
#parse_proxy_list(proxy_file) ⇒ Object
Parse the proxy list
74 75 76 77 78 |
# File 'lib/requestmanager.rb', line 74 def parse_proxy_list(proxy_file) if proxy_file return IO.readlines(proxy_file).map{ |proxy| proxy.strip.split(":")} end end |