Class: Baidu
- Inherits:
-
Object
- Object
- Baidu
- Defined in:
- lib/baidu.rb
Constant Summary collapse
- BaseUri =
'http://www.baidu.com/s?'
- PerPage =
100
Instance Method Summary collapse
-
#how_many_links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html.
-
#how_many_pages(host) ⇒ Object
site:xxx.yyy.com.
-
#how_many_pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz.
-
#initialize ⇒ Baidu
constructor
A new instance of Baidu.
-
#popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1) level = level.to_i - 1 words = [words] unless words.respond_to? ‘each’ extensions = Array.new words.each do |word| self.query(word) extensions = related_keywords extensions = suggestions(word) sleep sleeptime end extensions.uniq! return extensions if level < 1 return extensions + extend(extensions,level) end.
- #query(wd) ⇒ Object
- #suggestions(wd) ⇒ Object
Constructor Details
#initialize ⇒ Baidu
Returns a new instance of Baidu.
10 11 12 13 14 15 |
# File 'lib/baidu.rb', line 10 def initialize @a = Mechanize.new {|agent| agent.user_agent_alias = 'Linux Mozilla'} @a.idle_timeout = 2 @a.max_history = 1 @page = nil end |
Instance Method Details
#how_many_links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html
83 84 85 |
# File 'lib/baidu.rb', line 83 def how_many_links(uri) query("domain:\"#{uri}\"").how_many end |
#how_many_pages(host) ⇒ Object
site:xxx.yyy.com
78 79 80 |
# File 'lib/baidu.rb', line 78 def how_many_pages(host) query("site:#{host}").how_many end |
#how_many_pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz
88 89 90 |
# File 'lib/baidu.rb', line 88 def how_many_pages_with(host,string) query("site:#{host} inurl:#{string}").how_many end |
#popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1)
level = level.to_i - 1
words = [words] unless words.respond_to? 'each'
extensions = Array.new
words.each do |word|
self.query(word)
extensions += related_keywords
extensions += suggestions(word)
sleep sleeptime
end
extensions.uniq!
return extensions if level < 1
return extensions + extend(extensions,level)
end
41 42 43 |
# File 'lib/baidu.rb', line 41 def popular?(wd) return @a.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").body.include?"boxFlash" end |
#query(wd) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/baidu.rb', line 45 def query(wd) q = Array.new q << "wd=#{wd}" q << "rn=#{PerPage}" queryStr = q.join("&") #uri = URI.encode((BaseUri + queryStr).encode('GBK')) uri = URI.encode((BaseUri + queryStr)) begin @page = @a.get uri BaiduResult.new(@page) rescue Net::HTTP::Persistent::Error warn "#{uri}timeout" return false end =begin query = "#{query}" @uri = BaseUri+URI.encode(query.encode('GBK')) @page = @a.get @uri self.clean @number = self.how_many @maxpage = (@number / @perpage.to_f).round @maxpage =10 if @maxpage>10 @currpage =0 =end end |
#suggestions(wd) ⇒ Object
17 18 19 20 21 |
# File 'lib/baidu.rb', line 17 def suggestions(wd) json = @a.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8") m = /\[([^\]]*)\]/.match json return JSON.parse m[0] end |