Class: BaiduResult
- Inherits:
-
SearchResult
- Object
- SearchResult
- BaiduResult
- Defined in:
- lib/baidu.rb
Instance Method Summary collapse
- #ads_bottom ⇒ Object
- #ads_right ⇒ Object
- #ads_top ⇒ Object
- #has_result? ⇒ Boolean
- #how_many ⇒ Object
-
#initialize(page, baseuri, pagenumber = 1) ⇒ BaiduResult
constructor
A new instance of BaiduResult.
- #next ⇒ Object
- #ranks ⇒ Object
- #related_keywords ⇒ Object
Methods inherited from SearchResult
Constructor Details
#initialize(page, baseuri, pagenumber = 1) ⇒ BaiduResult
Returns a new instance of BaiduResult.
312 313 314 315 316 317 318 319 |
# File 'lib/baidu.rb', line 312 def initialize(page,baseuri,pagenumber=1) File.open('/tmp/file','w'){|f|f.puts page} @page = Nokogiri::HTML page @baseuri = baseuri @pagenumber = pagenumber # raise ArgumentError 'should be Mechanize::Page' unless page.class == Mechanize::Page # @page = page end |
Instance Method Details
#ads_bottom ⇒ Object
356 357 358 359 360 361 362 363 364 365 366 367 |
# File 'lib/baidu.rb', line 356 def ads_bottom ads = {} @page.search("//table[@class='EC_mr15']|//table[@class='ec_pp_f']").each do |table| id = table['id'] next unless id.nil? id = id[-1,1] href = table.search("font[@color='#008000']").text.split(/\s/).first.strip title = table.search("a").first.text.strip ads[id]= {'title'=>title,'href' => href,'host'=>href} end ads end |
#ads_right ⇒ Object
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 |
# File 'lib/baidu.rb', line 368 def ads_right ads = {} @page.search("//div[@id='ec_im_container']").each do |table| table.search("div[@id]").each do |div| id = div['id'][-1,1].to_i+1 title = div.search("a").first next if title.nil? title = title.text url = div.search("font[@color='#008000']").first next if url.nil? url = url.text ads[id.to_s] = {'title'=>title,'href'=>url,'host'=>url} end end ads end |
#ads_top ⇒ Object
343 344 345 346 347 348 349 350 351 352 353 354 355 |
# File 'lib/baidu.rb', line 343 def ads_top ads = {} id=0 @page.search("//table[@class='EC_mr15']|//table[@class='ec_pp_f']").each do |table| table_id = table['id'] next if table_id.nil? id += 1 href = table.search("font[@color='#008000']").text.split(/\s/).first.strip title = table.search("a").first.text.strip ads[id.to_s]= {'title'=>title,'href' => href,'host'=>href} end ads end |
#has_result? ⇒ Boolean
416 417 418 |
# File 'lib/baidu.rb', line 416 def has_result? @page.search('//div[@class="nors"]').empty? end |
#how_many ⇒ Object
return the top rank number from @ranks with the input host def rank(host)#on base of ranks
ranks.each do |id,line|
id = id.to_i
if host.class == Regexp
return id if line['host'] =~ host
elsif host.class == String
return id if line['host'] == host
end
end
return nil
end
398 399 400 |
# File 'lib/baidu.rb', line 398 def how_many @how_many ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first end |
#next ⇒ Object
406 407 408 409 410 411 412 413 414 |
# File 'lib/baidu.rb', line 406 def next url = @page.xpath('//a[text()="下一页>"]').first return if url.nil? url = url['href'] url = URI.join(@baseuri,url).to_s body = HTTParty.get(url) return BaiduResult.new(body,url,@pagenumber+1) # @page = BaiduResult.new(Mechanize.new.click(@page.link_with(:text=>/下一页/))) unless @page.link_with(:text=>/下一页/).nil? end |
#ranks ⇒ Object
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 |
# File 'lib/baidu.rb', line 321 def ranks return @ranks unless @ranks.nil? @ranks = Hash.new @page.search("//table[@class=\"result\"]|//table[@class=\"result-op\"]").each do |table| id = table['id'] @ranks[id] = Hash.new url = table.search("[@class=\"g\"]").first url = url.text unless url.nil? a = table.search("a").first @ranks[id]['text'] = a.text @ranks[id]['href'] = url #a.first['href'].sub('http://www.baidu.com/link?url=','').strip unless url.nil? url = url.strip @ranks[id]['host'] = Addressable::URI.parse(URI.encode("http://#{url}")).host else @ranks[id]['host'] = nil end end #@page.search("//table[@class=\"result\"]").map{|table|@page.search("//table[@id=\"#{table['id']}\"]//span[@class=\"g\"]").first}.map{|rank|URI(URI.encode('http://'+rank.text.strip)).host unless rank.nil?} @ranks end |
#related_keywords ⇒ Object
402 403 404 |
# File 'lib/baidu.rb', line 402 def @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text} end |