Class: Meiriyigua::CrawlClient

Inherits:
Object
  • Object
show all
Defined in:
lib/meiriyigua/crawl_client.rb

Constant Summary collapse

USER_AGENTS =
[
  "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
  "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10",
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22",
  "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.32 Safari/537.36",
  "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
  "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
  "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
  "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
]

Class Method Summary collapse

Class Method Details

.create_agentObject



17
18
19
20
21
22
# File 'lib/meiriyigua/crawl_client.rb', line 17

def self.create_agent
  Mechanize.new do |a| 
    a.user_agent = USER_AGENTS.shuffle.first
    a.max_history = 1
  end
end

.random_sleepObject



24
25
26
# File 'lib/meiriyigua/crawl_client.rb', line 24

def self.random_sleep
  sleep(rand(1..3))
end

.set_page_encoding(page) ⇒ Object



28
29
30
# File 'lib/meiriyigua/crawl_client.rb', line 28

def self.set_page_encoding(page)
  page.encoding = 'gbk' if page.encoding.downcase == 'gb2312'
end