Class: Query::Engine::Baidu

Inherits:
Object
  • Object
show all
Includes:
Query::Engine
Defined in:
lib/query/engine/baidu.rb

Constant Summary collapse

Host =
'www.baidu.com'
BaseUri =
'http://www.baidu.com/s?'
Options =
{
    :headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'}
}

Instance Attribute Summary

Attributes included from Query::Engine

#perpage

Class Method Summary collapse

Methods included from Query::Engine

indexed?

Class Method Details

domain:xxx.yyy.com/path/file.html



90
91
92
# File 'lib/query/engine/baidu.rb', line 90

def self.links(uri)
    self.query("domain:\"#{uri}\"")
end

.pages(host) ⇒ Object



85
86
87
# File 'lib/query/engine/baidu.rb', line 85

def self.pages(host)
    self.query("site:#{host}")
end

.pages_with(host, string) ⇒ Object

site:xxx.yyy.com inurl:zzz



95
96
97
# File 'lib/query/engine/baidu.rb', line 95

def self.pages_with(host,string)
    self.query("site:#{host} inurl:#{string}")
end

.popular?(wd) ⇒ Boolean

def extend(words,level=3,sleeptime=1)

    level = level.to_i - 1
    words = [words] unless words.respond_to? 'each'

    extensions = Array.new
    words.each do |word|
        self.query(word)
        extensions += related_keywords
        extensions += suggestions(word)
        sleep sleeptime
    end
    extensions.uniq!
    return extensions if level < 1
    return extensions + extend(extensions,level)
end

Returns:

  • (Boolean)


41
42
43
# File 'lib/query/engine/baidu.rb', line 41

def self.popular?(wd)
    return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash"
end

.query(wd, params = {}) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/query/engine/baidu.rb', line 45

def self.query(wd,params={})
    q = Array.new
    q << "wd=#{URI.encode(wd)}"
    q << "rn=#{@perpage.to_i}" if @perpage
    params.each do |k,v|
        q << "#{k.to_s}=#{v.to_s}"
    end
    queryStr = q.join("&")
    #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
    # uri = URI.encode((BaseUri + queryStr))
    uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&'))
    # begin
        # @page = @a.get uri
        @page = HTTParty.get(uri,Options)
        r = Query::Result::Baidu.new(@page)
        r.baseuri = uri
        r.pagenumber = 1
        r.perpage = @perpage
        r
    # rescue Exception => e
    #     warn e.to_s
    #     return false
    # end
=begin
    query = "#{query}"
    @uri = BaseUri+URI.encode(query.encode('GBK'))
    @page = @a.get @uri
    self.clean
    @number = self.how_many
    @maxpage = (@number / @perpage.to_f).round
    @maxpage =10 if @maxpage>10
    @currpage =0
=end
end

.query_within(host, query) ⇒ Object

site:xxx.yyy.com



81
82
83
# File 'lib/query/engine/baidu.rb', line 81

def self.query_within(host,query)
    self.query("#{query} site:#{host}")
end

.suggestions(wd) ⇒ Object



10
11
12
13
14
15
# File 'lib/query/engine/baidu.rb', line 10

def self.suggestions(wd)
    require 'json'
    json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").force_encoding('GBK').encode("UTF-8")
    m = /\[([^\]]*)\]/.match json
    return JSON.parse m[0]
end