Class: RequestManager

Inherits:
Object
  • Object
show all
Defined in:
lib/requestmanager.rb

Instance Method Summary collapse

Constructor Details

#initialize(proxy_list, request_interval, browser_num) ⇒ RequestManager

Returns a new instance of RequestManager.



7
8
9
10
11
12
13
14
# File 'lib/requestmanager.rb', line 7

def initialize(proxy_list, request_interval, browser_num)
  @proxy_list = parse_proxy_list(proxy_list)
  @request_interval = request_interval
  @used_proxies = Array.new
  @browser_num = browser_num
  @browsers = Hash.new
  open_n_browsers
end

Instance Method Details

#close_all_browsersObject

Close all the browsers



73
74
75
76
77
# File 'lib/requestmanager.rb', line 73

def close_all_browsers
  @browsers.each do |browser|
    browser[1][0].quit
  end
end

#gen_driver(chosen_proxy) ⇒ Object

Generate driver for searches



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/requestmanager.rb', line 100

def gen_driver(chosen_proxy)
  # Profile settings
  profile = Selenium::WebDriver::Firefox::Profile.new
  profile['intl.accept_languages'] = 'en'

  # Set proxy if proxy list, otherwise sleep
  if chosen_proxy
    proxy = Selenium::WebDriver::Proxy.new(http: chosen_proxy, ssl: chosen_proxy)
    profile.proxy = proxy
  else
    sleep(rand(@request_interval[0]..@request_interval[1]))
  end
  
  return Selenium::WebDriver.for :firefox, profile: profile
end

#get_least_recent_browserObject

Get the least recently used browser



47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/requestmanager.rb', line 47

def get_least_recent_browser
  least_recent = @browsers.first
  @browsers.each do |browser|
      if browser[1][1] < least_recent[1][1]
        least_recent = browser
      end
  end
  
  # Update the usage time
  @browsers[least_recent[0]] = [least_recent[1][0], Time.now]
  return least_recent[1][0]
end

#get_most_recent_browserObject

Get the most recently used browser



35
36
37
38
39
40
41
42
43
44
# File 'lib/requestmanager.rb', line 35

def get_most_recent_browser
  most_recent = @browsers.first
  @browsers.each do |browser|
    if browser[1][1] > most_recent[1][1]
      most_recent = browser
    end
  end

  return most_recent
end

#get_page(url, form_input = nil) ⇒ Object

Get the page requested



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/requestmanager.rb', line 80

def get_page(url, form_input = nil)
  # Get the page
  browser = get_least_recent_browser
  browser.navigate.to url
  puts "Getting page " + url

  # Handle form input if there is any
  if form_input
    element = browser.find_element(name: "q")
    element.send_keys form_input
    element.submit
  end

  # Sleep while things load then save output
  sleep(rand(@request_interval[0]..@request_interval[1]))
  page_html = browser.page_source
  return page_html
end

#get_random_proxyObject

Choose a random proxy that hasn’t been used recently



117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/requestmanager.rb', line 117

def get_random_proxy
  max = @proxy_list.length
  chosen_proxy = @proxy_list[Random.rand(max)]
  
  # Only use proxy if it hasn't been used in last n seconds on same host
  if !@used_proxies.include?(chosen_proxy)
    @used_proxies.push(chosen_proxy)
    return chosen_proxy
  else
    sleep(0.005)
    get_random_proxy
  end
end

#get_updated_current_pageObject

Get the html on the page now



30
31
32
# File 'lib/requestmanager.rb', line 30

def get_updated_current_page
  return get_most_recent_browser[1][0].page_source
end

#open_browserObject

Open the browser with a random proxy



24
25
26
27
# File 'lib/requestmanager.rb', line 24

def open_browser
  chosen_proxy = @proxy_list != nil ? get_random_proxy : nil
  @browsers[chosen_proxy] = [gen_driver(chosen_proxy), Time.now]
end

#open_n_browsersObject

Open the specified number of browsers



17
18
19
20
21
# File 'lib/requestmanager.rb', line 17

def open_n_browsers
  (1..@browser_num).each do |i|
    open_browser
  end
end

#parse_proxy_list(proxy_file) ⇒ Object

Parse the proxy list



132
133
134
135
136
# File 'lib/requestmanager.rb', line 132

def parse_proxy_list(proxy_file)
  if proxy_file
    return IO.readlines(proxy_file).map{ |proxy| proxy.strip }
  end
end

#restart_browserObject

Restart the browser and open new one



61
62
63
64
65
66
67
68
69
70
# File 'lib/requestmanager.rb', line 61

def restart_browser
  # Get most recently used browser and close it
  close_browser = get_most_recent_browser
  close_browser[1][0].quit

  # Remove it from lists of used browsers and start new
  @browsers.delete(close_browser[0])
  open_browser
  @used_proxies.delete(close_browser[0])
end