Class: Query::Engine::Baidu
- Inherits:
-
Object
- Object
- Query::Engine::Baidu
- Includes:
- Query::Engine
- Defined in:
- lib/query/engine/baidu.rb
Constant Summary collapse
- BaseUri =
'http://www.baidu.com/s?'- Options =
{ :headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'} }
Instance Attribute Summary
Attributes included from Query::Engine
Class Method Summary collapse
-
.popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1) level = level.to_i - 1 words = [words] unless words.respond_to? ‘each’.
- .query(wd) ⇒ Object
- .suggestions(wd) ⇒ Object
Instance Method Summary collapse
-
#links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html.
-
#pages(host) ⇒ Object
site:xxx.yyy.com.
-
#pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz.
Methods included from Query::Engine
Class Method Details
.popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1)
level = level.to_i - 1
words = [words] unless words.respond_to? 'each'
extensions = Array.new
words.each do |word|
self.query(word)
extensions += related_keywords
extensions += suggestions(word)
sleep sleeptime
end
extensions.uniq!
return extensions if level < 1
return extensions + extend(extensions,level)
end
40 41 42 |
# File 'lib/query/engine/baidu.rb', line 40 def self.popular?(wd) return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash" end |
.query(wd) ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/query/engine/baidu.rb', line 44 def self.query(wd) q = Array.new q << "wd=#{wd}" q << "rn=#{@perpage.to_i}" if @perpage queryStr = q.join("&") #uri = URI.encode((BaseUri + queryStr).encode('GBK')) uri = URI.encode((BaseUri + queryStr)) # begin # @page = @a.get uri @page = HTTParty.get(uri,Options) r = Query::Result::Baidu.new(@page) r.baseuri = uri r.pagenumber = 1 r.perpage = @perpage r # rescue Exception => e # warn e.to_s # return false # end =begin query = "#{query}" @uri = BaseUri+URI.encode(query.encode('GBK')) @page = @a.get @uri self.clean @number = self.how_many @maxpage = (@number / @perpage.to_f).round @maxpage =10 if @maxpage>10 @currpage =0 =end end |
.suggestions(wd) ⇒ Object
9 10 11 12 13 14 |
# File 'lib/query/engine/baidu.rb', line 9 def self.suggestions(wd) require 'json' json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").force_encoding('GBK').encode("UTF-8") m = /\[([^\]]*)\]/.match json return JSON.parse m[0] end |
Instance Method Details
#links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html
81 82 83 |
# File 'lib/query/engine/baidu.rb', line 81 def links(uri) query("domain:\"#{uri}\"") end |
#pages(host) ⇒ Object
site:xxx.yyy.com
76 77 78 |
# File 'lib/query/engine/baidu.rb', line 76 def pages(host) query("site:#{host}") end |
#pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz
86 87 88 |
# File 'lib/query/engine/baidu.rb', line 86 def pages_with(host,string) query("site:#{host} inurl:#{string}") end |