Class: MbaiduResult

Inherits:
SearchResult show all
Defined in:
lib/baidu.rb

Instance Method Summary collapse

Methods inherited from SearchResult

#rank, #ranks_for

Constructor Details

#initialize(body, baseuri, pagenumber = nil) ⇒ MbaiduResult

Returns a new instance of MbaiduResult.



125
126
127
128
129
130
131
132
133
# File 'lib/baidu.rb', line 125

def initialize(body,baseuri,pagenumber=nil)
    @body = Nokogiri::HTML body
    @baseuri = baseuri
    if pagenumber.nil?
        @pagenumber = 1
    else
        @pagenumber = pagenumber
    end
end

Instance Method Details

#nextObject

下一页



205
206
207
208
209
210
# File 'lib/baidu.rb', line 205

def next
    url = @body.xpath('//a[text()="下一页"]').first['href']
    url = URI.join(@baseuri,url).to_s
    body = HTTParty.get(url)
    return MbaiduResult.new(body,url,@pagenumber+1)
end

#ranksObject

返回当前页所有查询结果



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/baidu.rb', line 136

def ranks
    #如果已经赋值说明解析过,不需要重新解析,直接返回结果
    return @ranks unless @ranks.nil?
    @ranks = Hash.new
    @body.xpath('//div[@class="result"]').each do |result|
        href,text,host,is_mobile = '','','',false
        a = result.search("a").first
        is_mobile = true unless a.search("img").empty?
        host = result.search('[@class="site"]').first.text
        href = a['href']
        text = a.text
        id = href.scan(/&order=(\d+)&/)
        if id.empty?
            id = nil
        else
            id = id.first.first.to_i
            id = (@pagenumber-1)*10+id
        end
=begin
        result.children.each do |elem|
            if elem.name == 'a'
                href = elem['href']
                id = elem.text.match(/^\d+/).to_s.to_i
                text = elem.text.sub(/^\d+/,'')
                text.sub!(/^\u00A0/,'')
            elsif elem['class'] == 'abs'
                elem.children.each do |elem2|
                    if elem2['class'] == 'site'
                        host = elem2.text
                        break
                    end
                end
            elsif elem['class'] == 'site'
                host == elem['href']
            end
        end
=end

        @ranks[id.to_s] = {'href'=>href,'text'=>text,'is_mobile'=>is_mobile,'host'=>host.sub(/\u00A0/,'')}
    end
    @ranks
end