Class: Query::Engine::Baidu

Inherits:
Object
  • Object
show all
Includes:
Query::Engine
Defined in:
lib/query/engine/baidu.rb

Constant Summary collapse

BaseUri =
'http://www.baidu.com/s?'
Options =
{
    :headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'}
}

Instance Attribute Summary

Attributes included from Query::Engine

#perpage

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Query::Engine

indexed?

Class Method Details

.popular?(wd) ⇒ Boolean

def extend(words,level=3,sleeptime=1)

    level = level.to_i - 1
    words = [words] unless words.respond_to? 'each'

    extensions = Array.new
    words.each do |word|
        self.query(word)
        extensions += related_keywords
        extensions += suggestions(word)
        sleep sleeptime
    end
    extensions.uniq!
    return extensions if level < 1
    return extensions + extend(extensions,level)
end

Returns:

  • (Boolean)


40
41
42
# File 'lib/query/engine/baidu.rb', line 40

def self.popular?(wd)
    return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash"
end

.query(wd) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/query/engine/baidu.rb', line 44

def self.query(wd)
    q = Array.new
    q << "wd=#{wd}"
    q << "rn=#{@perpage.to_i}" if @perpage
    queryStr = q.join("&")
    #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
    uri = URI.encode((BaseUri + queryStr))
    # begin
        # @page = @a.get uri
        @page = HTTParty.get(uri,Options)
        r = Query::Result::Baidu.new(@page)
        r.baseuri = uri
        r.pagenumber = 1
        r.perpage = @perpage
        r
    # rescue Exception => e
    #     warn e.to_s
    #     return false
    # end
=begin
    query = "#{query}"
    @uri = BaseUri+URI.encode(query.encode('GBK'))
    @page = @a.get @uri
    self.clean
    @number = self.how_many
    @maxpage = (@number / @perpage.to_f).round
    @maxpage =10 if @maxpage>10
    @currpage =0
=end
end

.suggestions(wd) ⇒ Object



9
10
11
12
13
14
# File 'lib/query/engine/baidu.rb', line 9

def self.suggestions(wd)
    require 'json'
    json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").force_encoding('GBK').encode("UTF-8")
    m = /\[([^\]]*)\]/.match json
    return JSON.parse m[0]
end

Instance Method Details

domain:xxx.yyy.com/path/file.html



81
82
83
# File 'lib/query/engine/baidu.rb', line 81

def links(uri)
    query("domain:\"#{uri}\"")
end

#pages(host) ⇒ Object

site:xxx.yyy.com



76
77
78
# File 'lib/query/engine/baidu.rb', line 76

def pages(host)
    query("site:#{host}")
end

#pages_with(host, string) ⇒ Object

site:xxx.yyy.com inurl:zzz



86
87
88
# File 'lib/query/engine/baidu.rb', line 86

def pages_with(host,string)
    query("site:#{host} inurl:#{string}")
end