Class: Query::Engine::Baidu
- Inherits:
-
Object
- Object
- Query::Engine::Baidu
- Includes:
- Query::Engine
- Defined in:
- lib/query/engine/baidu.rb
Constant Summary collapse
- Host =
'www.baidu.com'
- BaseUri =
'http://www.baidu.com/s?'
- Options =
{ :headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'} }
Instance Attribute Summary
Attributes included from Query::Engine
Class Method Summary collapse
-
.links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html.
- .pages(host) ⇒ Object
-
.pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz.
-
.popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1) level = level.to_i - 1 words = [words] unless words.respond_to? ‘each’.
- .query(wd, params = {}) ⇒ Object
-
.query_within(host, query) ⇒ Object
site:xxx.yyy.com.
- .suggestions(query, options = {}) ⇒ Object
Methods included from Query::Engine
Class Method Details
.links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html
93 94 95 |
# File 'lib/query/engine/baidu.rb', line 93 def self.links(uri) self.query("domain:\"#{uri}\"") end |
.pages(host) ⇒ Object
88 89 90 |
# File 'lib/query/engine/baidu.rb', line 88 def self.pages(host) self.query("site:#{host}") end |
.pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz
98 99 100 |
# File 'lib/query/engine/baidu.rb', line 98 def self.pages_with(host,string) self.query("site:#{host} inurl:#{string}") end |
.popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1)
level = level.to_i - 1
words = [words] unless words.respond_to? 'each'
extensions = Array.new
words.each do |word|
self.query(word)
extensions += related_keywords
extensions += suggestions(word)
sleep sleeptime
end
extensions.uniq!
return extensions if level < 1
return extensions + extend(extensions,level)
end
42 43 44 |
# File 'lib/query/engine/baidu.rb', line 42 def self.popular?(wd) return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash" end |
.query(wd, params = {}) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/query/engine/baidu.rb', line 46 def self.query(wd,params={}) q = Array.new q << "wd=#{URI.encode(wd)}" q << "rn=#{@perpage.to_i}" if @perpage params.each do |k,v| q << "#{k.to_s}=#{v.to_s}" end queryStr = q.join("&") #uri = URI.encode((BaseUri + queryStr).encode('GBK')) # uri = URI.encode((BaseUri + queryStr)) uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&')) # begin # @page = @a.get uri p uri p Options @page = HTTParty.get(uri,Options) r = Query::Result::Baidu.new(@page) r.baseuri = uri r.pagenumber = 1 r.perpage = @perpage r # rescue Exception => e # warn e.to_s # return false # end =begin query = "#{query}" @uri = BaseUri+URI.encode(query.encode('GBK')) @page = @a.get @uri self.clean @number = self.how_many @maxpage = (@number / @perpage.to_f).round @maxpage =10 if @maxpage>10 @currpage =0 =end end |
.query_within(host, query) ⇒ Object
site:xxx.yyy.com
84 85 86 |
# File 'lib/query/engine/baidu.rb', line 84 def self.query_within(host,query) self.query("#{query} site:#{host}") end |
.suggestions(query, options = {}) ⇒ Object
11 12 13 14 15 16 |
# File 'lib/query/engine/baidu.rb', line 11 def self.suggestions(query, = {}) require 'json' query = URI.encode(query) suggestions = HTTParty.get("https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=#{query}&json=1&p=3&sid=&req=2&csor=0&cb=jQuery1102036467162938788533_1437556180622&_=#{(Time.now.to_f*1000).to_i}",) suggestions.force_encoding('GB18030').encode('UTF-8').delete('jQuery1102036467162938788533_1437556180622(').delete(')').scan(/"q": "([^"]+)"/).flatten.uniq end |