Class: Query::Engine::Baidu
- Inherits:
-
Object
- Object
- Query::Engine::Baidu
- Includes:
- Query::Engine
- Defined in:
- lib/query/engine/baidu.rb
Constant Summary collapse
- Host =
'www.baidu.com'- BaseUri =
'http://www.baidu.com/s?'- Options =
{ :headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'} }
Instance Attribute Summary
Attributes included from Query::Engine
Class Method Summary collapse
-
.links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html.
- .pages(host) ⇒ Object
-
.pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz.
-
.popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1) level = level.to_i - 1 words = [words] unless words.respond_to? ‘each’.
- .query(wd, params = {}) ⇒ Object
-
.query_within(host, query) ⇒ Object
site:xxx.yyy.com.
- .suggestions(wd) ⇒ Object
Methods included from Query::Engine
Class Method Details
.links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html
90 91 92 |
# File 'lib/query/engine/baidu.rb', line 90 def self.links(uri) self.query("domain:\"#{uri}\"") end |
.pages(host) ⇒ Object
85 86 87 |
# File 'lib/query/engine/baidu.rb', line 85 def self.pages(host) self.query("site:#{host}") end |
.pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz
95 96 97 |
# File 'lib/query/engine/baidu.rb', line 95 def self.pages_with(host,string) self.query("site:#{host} inurl:#{string}") end |
.popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1)
level = level.to_i - 1
words = [words] unless words.respond_to? 'each'
extensions = Array.new
words.each do |word|
self.query(word)
extensions += related_keywords
extensions += suggestions(word)
sleep sleeptime
end
extensions.uniq!
return extensions if level < 1
return extensions + extend(extensions,level)
end
41 42 43 |
# File 'lib/query/engine/baidu.rb', line 41 def self.popular?(wd) return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash" end |
.query(wd, params = {}) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/query/engine/baidu.rb', line 45 def self.query(wd,params={}) q = Array.new q << "wd=#{URI.encode(wd)}" q << "rn=#{@perpage.to_i}" if @perpage params.each do |k,v| q << "#{k.to_s}=#{v.to_s}" end queryStr = q.join("&") #uri = URI.encode((BaseUri + queryStr).encode('GBK')) # uri = URI.encode((BaseUri + queryStr)) uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&')) # begin # @page = @a.get uri @page = HTTParty.get(uri,Options) r = Query::Result::Baidu.new(@page) r.baseuri = uri r.pagenumber = 1 r.perpage = @perpage r # rescue Exception => e # warn e.to_s # return false # end =begin query = "#{query}" @uri = BaseUri+URI.encode(query.encode('GBK')) @page = @a.get @uri self.clean @number = self.how_many @maxpage = (@number / @perpage.to_f).round @maxpage =10 if @maxpage>10 @currpage =0 =end end |
.query_within(host, query) ⇒ Object
site:xxx.yyy.com
81 82 83 |
# File 'lib/query/engine/baidu.rb', line 81 def self.query_within(host,query) self.query("#{query} site:#{host}") end |
.suggestions(wd) ⇒ Object
10 11 12 13 14 15 |
# File 'lib/query/engine/baidu.rb', line 10 def self.suggestions(wd) require 'json' json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").force_encoding('GBK').encode("UTF-8") m = /\[([^\]]*)\]/.match json return JSON.parse m[0] end |