Class: Baidu
- Inherits:
-
SearchEngine
- Object
- SearchEngine
- Baidu
- Defined in:
- lib/baidu.rb
Constant Summary collapse
- BaseUri =
'http://www.baidu.com/s?'
- PerPage =
100
Instance Method Summary collapse
-
#how_many_links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html.
-
#how_many_pages(host) ⇒ Object
site:xxx.yyy.com.
-
#how_many_pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz.
-
#initialize ⇒ Baidu
constructor
A new instance of Baidu.
-
#popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1) level = level.to_i - 1 words = [words] unless words.respond_to? ‘each’.
- #query(wd) ⇒ Object
- #suggestions(wd) ⇒ Object
-
#url(id) ⇒ Object
to find out the real url for something lik ‘www.baidu.com/link?url=7yoYGJqjJ4zBBpC8yDF8xDhctimd_UkfF8AVaJRPKduy2ypxVG18aRB5L6D558y3MjT_Ko0nqFgkMoS’.
Methods inherited from SearchEngine
Constructor Details
#initialize ⇒ Baidu
Returns a new instance of Baidu.
270 271 272 273 274 275 |
# File 'lib/baidu.rb', line 270 def initialize # @a = Mechanize.new {|agent| agent.user_agent_alias = 'Linux Mozilla'} # @a.idle_timeout = 2 # @a.max_history = 1 @page = nil end |
Instance Method Details
#how_many_links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html
344 345 346 |
# File 'lib/baidu.rb', line 344 def how_many_links(uri) query("domain:\"#{uri}\"").how_many end |
#how_many_pages(host) ⇒ Object
site:xxx.yyy.com
339 340 341 |
# File 'lib/baidu.rb', line 339 def how_many_pages(host) query("site:#{host}").how_many end |
#how_many_pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz
349 350 351 |
# File 'lib/baidu.rb', line 349 def how_many_pages_with(host,string) query("site:#{host} inurl:#{string}").how_many end |
#popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1)
level = level.to_i - 1
words = [words] unless words.respond_to? 'each'
extensions = Array.new
words.each do |word|
self.query(word)
extensions += related_keywords
extensions += suggestions(word)
sleep sleeptime
end
extensions.uniq!
return extensions if level < 1
return extensions + extend(extensions,level)
end
307 308 309 |
# File 'lib/baidu.rb', line 307 def popular?(wd) return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").body.include?"boxFlash" end |
#query(wd) ⇒ Object
311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
# File 'lib/baidu.rb', line 311 def query(wd) q = Array.new q << "wd=#{wd}" q << "rn=#{PerPage}" queryStr = q.join("&") #uri = URI.encode((BaseUri + queryStr).encode('GBK')) uri = URI.encode((BaseUri + queryStr)) begin # @page = @a.get uri @page = HTTParty.get uri BaiduResult.new(@page,uri) rescue Net::HTTP::Persistent::Error warn "[timeout] #{uri}" return false end =begin query = "#{query}" @uri = BaseUri+URI.encode(query.encode('GBK')) @page = @a.get @uri self.clean @number = self.how_many @maxpage = (@number / @perpage.to_f).round @maxpage =10 if @maxpage>10 @currpage =0 =end end |
#suggestions(wd) ⇒ Object
277 278 279 280 281 |
# File 'lib/baidu.rb', line 277 def suggestions(wd) json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8") m = /\[([^\]]*)\]/.match json return JSON.parse m[0] end |
#url(id) ⇒ Object
to find out the real url for something lik ‘www.baidu.com/link?url=7yoYGJqjJ4zBBpC8yDF8xDhctimd_UkfF8AVaJRPKduy2ypxVG18aRB5L6D558y3MjT_Ko0nqFgkMoS’
283 284 285 286 287 |
# File 'lib/baidu.rb', line 283 def url(id) a = Mechanize.new a.redirect_ok=false return a.head("http://www.baidu.com/link?url=#{id}").header['location'] end |