Class: Query::Engine::Baidu

Inherits:
Object
  • Object
show all
Includes:
Query::Engine
Defined in:
lib/query/engine/baidu.rb

Constant Summary collapse

Host =
'www.baidu.com'
BaseUri =
'http://www.baidu.com/s?'
Options =
{
  :headers => {"User-Agent" => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1 Safari/537.73.11'}
}

Instance Attribute Summary

Attributes included from Query::Engine

#perpage

Class Method Summary collapse

Methods included from Query::Engine

indexed?

Class Method Details

domain:xxx.yyy.com/path/file.html



93
94
95
# File 'lib/query/engine/baidu.rb', line 93

def self.links(uri)
  self.query("domain:\"#{uri}\"")
end

.pages(host) ⇒ Object



88
89
90
# File 'lib/query/engine/baidu.rb', line 88

def self.pages(host)
  self.query("site:#{host}")
end

.pages_with(host, string) ⇒ Object

site:xxx.yyy.com inurl:zzz



98
99
100
# File 'lib/query/engine/baidu.rb', line 98

def self.pages_with(host,string)
  self.query("site:#{host} inurl:#{string}")
end

.popular?(wd) ⇒ Boolean

def extend(words,level=3,sleeptime=1)

    level = level.to_i - 1
    words = [words] unless words.respond_to? 'each'

    extensions = Array.new
    words.each do |word|
        self.query(word)
        extensions += related_keywords
        extensions += suggestions(word)
        sleep sleeptime
    end
    extensions.uniq!
    return extensions if level < 1
    return extensions + extend(extensions,level)
end

Returns:

  • (Boolean)


42
43
44
# File 'lib/query/engine/baidu.rb', line 42

def self.popular?(wd)
  return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash"
end

.query(wd, params = {}) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/query/engine/baidu.rb', line 46

def self.query(wd,params={})
  q = Array.new
  q << "wd=#{URI.encode(wd)}"
  q << "rn=#{@perpage.to_i}" if @perpage
  params.each do |k,v|
    q << "#{k.to_s}=#{v.to_s}"
  end
  queryStr = q.join("&")
  #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
  # uri = URI.encode((BaseUri + queryStr))
  uri = URI::HTTP.build(:host=>Host,:path=>'/s',:query=>q.join('&'))
  # begin
  # @page = @a.get uri
  p uri
  p Options
  @page = HTTParty.get(uri,Options)
  r = Query::Result::Baidu.new(@page)
  r.baseuri = uri
  r.pagenumber = 1
  r.perpage = @perpage
  r
  # rescue Exception => e
  #     warn e.to_s
  #     return false
  # end
=begin
          query = "#{query}"
          @uri = BaseUri+URI.encode(query.encode('GBK'))
          @page = @a.get @uri
          self.clean
          @number = self.how_many
          @maxpage = (@number / @perpage.to_f).round
          @maxpage =10 if @maxpage>10
          @currpage =0
=end
end

.query_within(host, query) ⇒ Object

site:xxx.yyy.com



84
85
86
# File 'lib/query/engine/baidu.rb', line 84

def self.query_within(host,query)
  self.query("#{query} site:#{host}")
end

.suggestions(query, options = {}) ⇒ Object



11
12
13
14
15
16
# File 'lib/query/engine/baidu.rb', line 11

def self.suggestions(query,options = {})
  require 'json'
  query = URI.encode(query)
  suggestions = HTTParty.get("https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=#{query}&json=1&p=3&sid=&req=2&csor=0&cb=jQuery1102036467162938788533_1437556180622&_=#{(Time.now.to_f*1000).to_i}",options)
  suggestions.force_encoding('GB18030').encode('UTF-8').delete('jQuery1102036467162938788533_1437556180622(').delete(')').scan(/"q": "([^"]+)"/).flatten.uniq
end