Class: Query::Result::Baidu
- Inherits:
-
Base
- Object
- Base
- Query::Result::Baidu
show all
- Defined in:
- lib/query/result/baidu.rb
Instance Attribute Summary
Attributes inherited from Base
#baseuri, #pagenumber, #perpage
Instance Method Summary
collapse
Methods inherited from Base
#initialize, #rank, #ranks_for, #whole
Instance Method Details
#ads_bottom ⇒ Object
32
33
34
35
36
|
# File 'lib/query/result/baidu.rb', line 32
def ads_bottom
return {} if @page.search("//table[@bgcolor='f5f5f5']").empty?
return ads_top
end
|
#ads_right ⇒ Object
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
# File 'lib/query/result/baidu.rb', line 62
def ads_right
ads = {}
@page.search("//div[@id='ec_im_container']").each do |table|
table.search("div[@id]").each do |div|
id = div['id'][-1,1].to_i+1
title = div.search("a").first
next if title.nil?
title = title.text
url = div.search("font[@color='#008000']").first
next if url.nil?
url = url.text
ads[id.to_s] = {'title'=>title,'href'=>url,'host'=>url}
end
end
ads
end
|
#ads_top ⇒ Object
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
# File 'lib/query/result/baidu.rb', line 37
def ads_top
ads = Hash.new
@page.search("//table[@bgcolor='#f5f5f5']").each do |table|
id = table['id']
next if id.nil?
id = id[2,3].to_i.to_s
ads[id]= parse_ad(table)
end
if ads.empty?
@page.search("//table").each do |table|
id = table['id']
next if id.nil? or id.to_i<3000
id = id[2,3].to_i.to_s
ads[id]= parse_ad(table)
end
end
ads
end
|
#count ⇒ Object
return the top rank number from @ranks with the input host def rank(host)#on base of ranks
ranks.each do |id,line|
id = id.to_i
if host.class == Regexp
return id if line['host'] =~ host
elsif host.class == String
return id if line['host'] == host
end
end
return nil
end
92
93
94
|
# File 'lib/query/result/baidu.rb', line 92
def count
@count ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
end
|
#has_result? ⇒ Boolean
114
115
116
117
118
|
# File 'lib/query/result/baidu.rb', line 114
def has_result?
submit = @page.search('//a[text()="提交网址"]').first
return false if submit and submit['href'].include?'sitesubmit'
return true
end
|
#next ⇒ Object
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
# File 'lib/query/result/baidu.rb', line 100
def next
url = @page.xpath('//a[text()="下一页>"]').first
return if url.nil?
url = url['href']
url = URI.join(@baseuri,url).to_s
page = HTTParty.get(url)
r = Query::Result::Baidu.new(page)
r.baseuri = url
r.pagenumber=@pagenumber+1
r.perpage=@perpage
r
end
|
#parse_ad(table) ⇒ Object
57
58
59
60
61
|
# File 'lib/query/result/baidu.rb', line 57
def parse_ad(table)
href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
title = table.search("a").first.text.strip
{'title'=>title,'href' => href,'host'=>href}
end
|
#ranks ⇒ Object
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
# File 'lib/query/result/baidu.rb', line 4
def ranks
return @ranks unless @ranks.nil?
@ranks = Hash.new
@page.search("//table[@class=\"result\"]|//table[@class=\"result-op\"]").each do |table|
id = table['id']
@ranks[id] = Hash.new
url = table.search("[@class=\"g\"]").first
url = url.text unless url.nil?
a = table.search("h3").first
next if a.nil?
@ranks[id]['text'] = a.text
@ranks[id]['href'] = url
unless url.nil?
url = url.strip
@ranks[id]['host'] = Addressable::URI.parse(URI.encode("http://#{url}")).host
else
@ranks[id]['host'] = nil
end
end
@ranks
end
|
96
97
98
|
# File 'lib/query/result/baidu.rb', line 96
def related_keywords
@related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
end
|