Class: Baidu
- Inherits:
-
Object
- Object
- Baidu
- Defined in:
- lib/baidu.rb
Constant Summary collapse
- BaseUri =
'http://www.baidu.com/s?'
Instance Attribute Summary collapse
-
#data ⇒ Object
readonly
Returns the value of attribute data.
-
#debug ⇒ Object
Returns the value of attribute debug.
-
#page ⇒ Object
readonly
Returns the value of attribute page.
-
#pagenumber ⇒ Object
Returns the value of attribute pagenumber.
-
#perpage ⇒ Object
Returns the value of attribute perpage.
-
#wd ⇒ Object
readonly
Returns the value of attribute wd.
Instance Method Summary collapse
- #how_many ⇒ Object
-
#how_many_links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html.
-
#how_many_pages(host) ⇒ Object
site:xxx.yyy.com.
-
#how_many_pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz.
-
#initialize ⇒ Baidu
constructor
A new instance of Baidu.
- #next ⇒ Object
- #popular?(wd) ⇒ Boolean
- #query(wd) ⇒ Object
-
#rank(host) ⇒ Object
look up a word and get the rank of a uri with $host.
-
#ranks ⇒ Object
(keyword=false).
- #related_keywords ⇒ Object
- #suggestions(wd) ⇒ Object
Constructor Details
#initialize ⇒ Baidu
Returns a new instance of Baidu.
9 10 11 12 13 14 15 16 17 18 |
# File 'lib/baidu.rb', line 9 def initialize @a = Mechanize.new {|agent| agent.user_agent_alias = 'Linux Mozilla'} @a.idle_timeout = 2 @a.max_history = 1 @perpage = 100 @page = nil @debug = false @data = Hash.new #@baseuri = "http://www.baidu.com/s?rn=#{@perpage}&wd=" end |
Instance Attribute Details
#data ⇒ Object (readonly)
Returns the value of attribute data.
7 8 9 |
# File 'lib/baidu.rb', line 7 def data @data end |
#debug ⇒ Object
Returns the value of attribute debug.
6 7 8 |
# File 'lib/baidu.rb', line 6 def debug @debug end |
#page ⇒ Object (readonly)
Returns the value of attribute page.
7 8 9 |
# File 'lib/baidu.rb', line 7 def page @page end |
#pagenumber ⇒ Object
Returns the value of attribute pagenumber.
6 7 8 |
# File 'lib/baidu.rb', line 6 def pagenumber @pagenumber end |
#perpage ⇒ Object
Returns the value of attribute perpage.
6 7 8 |
# File 'lib/baidu.rb', line 6 def perpage @perpage end |
#wd ⇒ Object (readonly)
Returns the value of attribute wd.
7 8 9 |
# File 'lib/baidu.rb', line 7 def wd @wd end |
Instance Method Details
#how_many ⇒ Object
125 126 127 128 129 130 131 132 133 |
# File 'lib/baidu.rb', line 125 def how_many return @data['how_many'] if @data.has_key?'how_many' raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page numSpan = @page.search("//span[@class='nums']").first return false if numSpan.nil? return numSpan.content.gsub(/\D/,'').to_i #return false if @page.search("//span[@class='nums']").first.nil? #return @page.search("//span[@class='nums']").first.content.gsub(/\D/,'').to_i end |
#how_many_links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html
65 66 67 68 69 |
# File 'lib/baidu.rb', line 65 def how_many_links(uri) return @data['how_many']if @data.has_key?'how_many' query("domain:\"#{uri}\"") return how_many end |
#how_many_pages(host) ⇒ Object
site:xxx.yyy.com
58 59 60 61 62 |
# File 'lib/baidu.rb', line 58 def how_many_pages(host) return @data['how_many']if @data.has_key?'how_many' query("site:#{host}") return how_many end |
#how_many_pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz
72 73 74 75 76 |
# File 'lib/baidu.rb', line 72 def how_many_pages_with(host,string) return @data['how_many']if @data.has_key?'how_many' query("site:#{host} inurl:#{string}") return how_many end |
#next ⇒ Object
135 136 137 138 139 140 141 |
# File 'lib/baidu.rb', line 135 def next nextbtn = @page.link_with(:text=>/下一页/) return false if (nextbtn.nil? or @currpage >= @maxpage) @page = @a.click(nextbtn) self.clean return true end |
#popular?(wd) ⇒ Boolean
27 28 29 |
# File 'lib/baidu.rb', line 27 def popular?(wd) return @a.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").body.include?"boxFlash" end |
#query(wd) ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/baidu.rb', line 31 def query(wd) @data.clear @wd = wd @data.clear q = Array.new q << "wd=#{wd}" q << "rn=#{@perpage}" queryStr = q.join("&") uri = URI.encode((BaseUri + queryStr).encode('GBK')) @page = @a.get uri clean @number = self.how_many @maxpage = (@number / @perpage.to_f).round @currpage =0 =begin query = "#{query}" @uri = @baseuri+URI.encode(query.encode('GBK')) @page = @a.get @uri self.clean @number = self.how_many @maxpage = (@number / @perpage.to_f).round @maxpage =10 if @maxpage>10 @currpage =0 =end end |
#rank(host) ⇒ Object
look up a word and get the rank of a uri with $host
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/baidu.rb', line 79 def rank(host)#on base of ranks return @data[:rank][host] if @data.has_key?:rank and @data[:rank].has_key?host ranks.each_with_index do |uri,index| if URI.parse(URI.encode(uri).host) @data << {:rank=>{host=>index+1}} return index+1 end end =begin @page.search("//table[@class=\"result\"]").each do |table| href = @page.search("//table[@id=\"#{table['id']}\"]//a").first['href'] begin return table['id'] if host==URI.parse(URI.encode(href)).host rescue URI::InvalidURIError puts "invalid uri:#{href}" if @debug end end return false =end end |
#ranks ⇒ Object
(keyword=false)
100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/baidu.rb', line 100 def ranks#(keyword=false) return @data[:ranks] if @data.has_key?:ranks raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page #self.query(keyword) if keyword ranks = Array.new @page.search("//table[@class=\"result\"]").each do |table| ranks << @page.search("//table[@id=\"#{table['id']}\"]//a").first['href'] end @data[:ranks] = ranks return ranks end |
#related_keywords ⇒ Object
112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/baidu.rb', line 112 def return @data[:realated_keywords] if @data.has_key?:realated_keywords raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page keywords = Array.new div = @page.search("//div[@id=\"rs\"]//tr//a") return false if div.nil? div.each do |keyword| keywords << keyword.text end return keywords #m = /href="[^"]+">([^<]+)<\/a>/.match(related.content) end |
#suggestions(wd) ⇒ Object
21 22 23 24 25 |
# File 'lib/baidu.rb', line 21 def suggestions(wd) json = @a.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8") m = /\[([^\]]*)\]/.match json return JSON.parse m[0] end |