Class: BaiduResult

Inherits:
SearchResult show all
Defined in:
lib/baidu.rb

Instance Method Summary collapse

Methods inherited from SearchResult

#rank, #ranks_for

Constructor Details

#initialize(page, baseuri, pagenumber = 1) ⇒ BaiduResult

Returns a new instance of BaiduResult.



312
313
314
315
316
317
318
319
# File 'lib/baidu.rb', line 312

def initialize(page,baseuri,pagenumber=1)
    File.open('/tmp/file','w'){|f|f.puts page}
    @page = Nokogiri::HTML page
    @baseuri = baseuri
    @pagenumber = pagenumber
    # raise ArgumentError 'should be Mechanize::Page' unless page.class == Mechanize::Page
    # @page = page
end

Instance Method Details

#ads_bottomObject



356
357
358
359
360
361
362
363
364
365
366
367
# File 'lib/baidu.rb', line 356

def ads_bottom
    ads = {}
    @page.search("//table[@class='EC_mr15']|//table[@class='ec_pp_f']").each do |table|
        id = table['id']
        next unless id.nil?
        id = id[-1,1]
        href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
        title = table.search("a").first.text.strip
        ads[id]= {'title'=>title,'href' => href,'host'=>href}
    end
    ads
end

#ads_rightObject



368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# File 'lib/baidu.rb', line 368

def ads_right
    ads = {}
    @page.search("//div[@id='ec_im_container']").each do |table|
        table.search("div[@id]").each do |div|
            id = div['id'][-1,1].to_i+1
            title = div.search("a").first
            next if title.nil?
            title = title.text
            url = div.search("font[@color='#008000']").first
            next if url.nil?
            url = url.text
            ads[id.to_s] = {'title'=>title,'href'=>url,'host'=>url}
        end
    end
    ads
end

#ads_topObject



343
344
345
346
347
348
349
350
351
352
353
354
355
# File 'lib/baidu.rb', line 343

def ads_top
    ads = {}
    id=0
    @page.search("//table[@class='EC_mr15']|//table[@class='ec_pp_f']").each do |table|
        table_id = table['id']
        next if table_id.nil?
        id += 1
        href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
        title = table.search("a").first.text.strip
        ads[id.to_s]= {'title'=>title,'href' => href,'host'=>href}
    end
    ads
end

#has_result?Boolean

Returns:

  • (Boolean)


416
417
418
# File 'lib/baidu.rb', line 416

def has_result?
    @page.search('//div[@class="nors"]').empty?
end

#how_manyObject

return the top rank number from @ranks with the input host def rank(host)#on base of ranks

ranks.each do |id,line|
    id = id.to_i
    if host.class == Regexp
        return id if line['host'] =~ host
    elsif host.class == String
        return id if line['host'] == host
    end
end
return nil

end



398
399
400
# File 'lib/baidu.rb', line 398

def how_many
    @how_many ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
end

#nextObject



406
407
408
409
410
411
412
413
414
# File 'lib/baidu.rb', line 406

def next
    url = @page.xpath('//a[text()="下一页>"]').first
    return if url.nil?
    url = url['href']
    url = URI.join(@baseuri,url).to_s
    body = HTTParty.get(url)
    return BaiduResult.new(body,url,@pagenumber+1)
    # @page = BaiduResult.new(Mechanize.new.click(@page.link_with(:text=>/下一页/))) unless @page.link_with(:text=>/下一页/).nil?
end

#ranksObject



321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/baidu.rb', line 321

def ranks
    return @ranks unless @ranks.nil?
    @ranks = Hash.new
    @page.search("//table[@class=\"result\"]|//table[@class=\"result-op\"]").each do |table|
        id = table['id']
        @ranks[id] = Hash.new
        url = table.search("[@class=\"g\"]").first
        url = url.text unless url.nil?
        a = table.search("a").first
        @ranks[id]['text'] = a.text
        @ranks[id]['href'] = url #a.first['href'].sub('http://www.baidu.com/link?url=','').strip
        unless url.nil?
            url = url.strip
            @ranks[id]['host'] = Addressable::URI.parse(URI.encode("http://#{url}")).host
        else
            @ranks[id]['host'] = nil
        end
    end
    #@page.search("//table[@class=\"result\"]").map{|table|@page.search("//table[@id=\"#{table['id']}\"]//span[@class=\"g\"]").first}.map{|rank|URI(URI.encode('http://'+rank.text.strip)).host unless rank.nil?}
    @ranks
end


402
403
404
# File 'lib/baidu.rb', line 402

def related_keywords
    @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
end