Class: Query::Engine::Baidu

Inherits:
Base
  • Object
show all
Defined in:
lib/query/engine/baidu.rb

Constant Summary collapse

BaseUri =
'http://www.baidu.com/s?'

Instance Attribute Summary

Attributes inherited from Base

#perpage

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#indexed?

Class Method Details

.popular?(wd) ⇒ Boolean

def extend(words,level=3,sleeptime=1)

    level = level.to_i - 1
    words = [words] unless words.respond_to? 'each'

    extensions = Array.new
    words.each do |word|
        self.query(word)
        extensions += related_keywords
        extensions += suggestions(word)
        sleep sleeptime
    end
    extensions.uniq!
    return extensions if level < 1
    return extensions + extend(extensions,level)
end

Returns:

  • (Boolean)


36
37
38
# File 'lib/query/engine/baidu.rb', line 36

def self.popular?(wd)
    return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash"
end

.suggestions(wd) ⇒ Object



5
6
7
8
9
10
# File 'lib/query/engine/baidu.rb', line 5

def self.suggestions(wd)
    require 'json'
    json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").force_encoding('GBK').encode("UTF-8")
    m = /\[([^\]]*)\]/.match json
    return JSON.parse m[0]
end

Instance Method Details

domain:xxx.yyy.com/path/file.html



77
78
79
# File 'lib/query/engine/baidu.rb', line 77

def links(uri)
    query("domain:\"#{uri}\"")
end

#pages(host) ⇒ Object

site:xxx.yyy.com



72
73
74
# File 'lib/query/engine/baidu.rb', line 72

def pages(host)
    query("site:#{host}")
end

#pages_with(host, string) ⇒ Object

site:xxx.yyy.com inurl:zzz



82
83
84
# File 'lib/query/engine/baidu.rb', line 82

def pages_with(host,string)
    query("site:#{host} inurl:#{string}")
end

#query(wd) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/query/engine/baidu.rb', line 40

def query(wd)
    q = Array.new
    q << "wd=#{wd}"
    q << "rn=#{@perpage.to_i}" if @perpage
    queryStr = q.join("&")
    #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
    uri = URI.encode((BaseUri + queryStr))
    begin
        # @page = @a.get uri
        @page = HTTParty.get uri
        r = Query::Result::Baidu.new(@page)
        r.baseuri = uri
        r.pagenumber = 1
        r.perpage = @perpage
        r
    rescue Exception => e
        warn e.to_s
        return false
    end
=begin
    query = "#{query}"
    @uri = BaseUri+URI.encode(query.encode('GBK'))
    @page = @a.get @uri
    self.clean
    @number = self.how_many
    @maxpage = (@number / @perpage.to_f).round
    @maxpage =10 if @maxpage>10
    @currpage =0
=end

end