Class: Baidu

Inherits:
SearchEngine show all
Defined in:
lib/baidu.rb

Constant Summary collapse

BaseUri =
'http://www.baidu.com/s?'
PerPage =
100

Instance Method Summary collapse

Methods inherited from SearchEngine

#indexed?

Constructor Details

#initializeBaidu

Returns a new instance of Baidu.



217
218
219
220
221
222
# File 'lib/baidu.rb', line 217

def initialize
    @a = Mechanize.new {|agent| agent.user_agent_alias = 'Linux Mozilla'}
    @a.idle_timeout = 2
    @a.max_history = 1
    @page = nil
end

Instance Method Details

domain:xxx.yyy.com/path/file.html



290
291
292
# File 'lib/baidu.rb', line 290

def how_many_links(uri)
    query("domain:\"#{uri}\"").how_many
end

#how_many_pages(host) ⇒ Object

site:xxx.yyy.com



285
286
287
# File 'lib/baidu.rb', line 285

def how_many_pages(host)
    query("site:#{host}").how_many
end

#how_many_pages_with(host, string) ⇒ Object

site:xxx.yyy.com inurl:zzz



295
296
297
# File 'lib/baidu.rb', line 295

def how_many_pages_with(host,string)
    query("site:#{host} inurl:#{string}").how_many
end

#popular?(wd) ⇒ Boolean

def extend(words,level=3,sleeptime=1)

    level = level.to_i - 1
    words = [words] unless words.respond_to? 'each'

    extensions = Array.new
    words.each do |word|
        self.query(word)
        extensions += related_keywords
        extensions += suggestions(word)
        sleep sleeptime
    end
    extensions.uniq!
    return extensions if level < 1
    return extensions + extend(extensions,level)
end

Returns:

  • (Boolean)


254
255
256
# File 'lib/baidu.rb', line 254

def popular?(wd)
    return @a.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").body.include?"boxFlash"
end

#query(wd) ⇒ Object



258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/baidu.rb', line 258

def query(wd)
    q = Array.new
    q << "wd=#{wd}"
    q << "rn=#{PerPage}"
    queryStr = q.join("&")
    #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
    uri = URI.encode((BaseUri + queryStr))
    begin
        @page = @a.get uri
        BaiduResult.new(@page)
    rescue Net::HTTP::Persistent::Error
        warn "[timeout] #{uri}"
        return false
    end
=begin
    query = "#{query}"
    @uri = BaseUri+URI.encode(query.encode('GBK'))
    @page = @a.get @uri
    self.clean
    @number = self.how_many
    @maxpage = (@number / @perpage.to_f).round
    @maxpage =10 if @maxpage>10
    @currpage =0
=end
end

#suggestions(wd) ⇒ Object



224
225
226
227
228
# File 'lib/baidu.rb', line 224

def suggestions(wd)
    json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8")
    m = /\[([^\]]*)\]/.match json
    return JSON.parse m[0]
end

#url(id) ⇒ Object



230
231
232
233
234
# File 'lib/baidu.rb', line 230

def url(id)
  a = Mechanize.new
  a.redirect_ok=false
  return a.head("http://www.baidu.com/link?url=#{id}").header['location']
end