Class: Baidu

Inherits:
SearchEngine show all
Defined in:
lib/baidu.rb

Constant Summary collapse

BaseUri =
'http://www.baidu.com/s?'
PerPage =
100

Instance Method Summary collapse

Methods inherited from SearchEngine

#indexed?

Constructor Details

#initializeBaidu

Returns a new instance of Baidu.



270
271
272
273
274
275
# File 'lib/baidu.rb', line 270

def initialize
    # @a = Mechanize.new {|agent| agent.user_agent_alias = 'Linux Mozilla'}
    # @a.idle_timeout = 2
    # @a.max_history = 1
    @page = nil
end

Instance Method Details

domain:xxx.yyy.com/path/file.html



344
345
346
# File 'lib/baidu.rb', line 344

def how_many_links(uri)
    query("domain:\"#{uri}\"").how_many
end

#how_many_pages(host) ⇒ Object

site:xxx.yyy.com



339
340
341
# File 'lib/baidu.rb', line 339

def how_many_pages(host)
    query("site:#{host}").how_many
end

#how_many_pages_with(host, string) ⇒ Object

site:xxx.yyy.com inurl:zzz



349
350
351
# File 'lib/baidu.rb', line 349

def how_many_pages_with(host,string)
    query("site:#{host} inurl:#{string}").how_many
end

#popular?(wd) ⇒ Boolean

def extend(words,level=3,sleeptime=1)

    level = level.to_i - 1
    words = [words] unless words.respond_to? 'each'

    extensions = Array.new
    words.each do |word|
        self.query(word)
        extensions += related_keywords
        extensions += suggestions(word)
        sleep sleeptime
    end
    extensions.uniq!
    return extensions if level < 1
    return extensions + extend(extensions,level)
end

Returns:

  • (Boolean)


307
308
309
# File 'lib/baidu.rb', line 307

def popular?(wd)
    return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").body.include?"boxFlash"
end

#query(wd) ⇒ Object



311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# File 'lib/baidu.rb', line 311

def query(wd)
    q = Array.new
    q << "wd=#{wd}"
    q << "rn=#{PerPage}"
    queryStr = q.join("&")
    #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
    uri = URI.encode((BaseUri + queryStr))
    begin
        # @page = @a.get uri
        @page = HTTParty.get uri
        BaiduResult.new(@page,uri)
    rescue Net::HTTP::Persistent::Error
        warn "[timeout] #{uri}"
        return false
    end
=begin
    query = "#{query}"
    @uri = BaseUri+URI.encode(query.encode('GBK'))
    @page = @a.get @uri
    self.clean
    @number = self.how_many
    @maxpage = (@number / @perpage.to_f).round
    @maxpage =10 if @maxpage>10
    @currpage =0
=end
end

#suggestions(wd) ⇒ Object



277
278
279
280
281
# File 'lib/baidu.rb', line 277

def suggestions(wd)
    json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8")
    m = /\[([^\]]*)\]/.match json
    return JSON.parse m[0]
end

#url(id) ⇒ Object



283
284
285
286
287
# File 'lib/baidu.rb', line 283

def url(id)
  a = Mechanize.new
  a.redirect_ok=false
  return a.head("http://www.baidu.com/link?url=#{id}").header['location']
end