Class: MbaiduResult
- Inherits:
-
SearchResult
- Object
- SearchResult
- MbaiduResult
- Defined in:
- lib/baidu.rb
Instance Method Summary collapse
- #ads_bottom ⇒ Object
- #ads_right ⇒ Object
- #ads_top ⇒ Object
-
#initialize(body, baseuri, pagenumber = nil) ⇒ MbaiduResult
constructor
A new instance of MbaiduResult.
-
#next ⇒ Object
下一页.
-
#ranks ⇒ Object
返回当前页所有查询结果.
- #related_keywords ⇒ Object
Methods inherited from SearchResult
Constructor Details
#initialize(body, baseuri, pagenumber = nil) ⇒ MbaiduResult
Returns a new instance of MbaiduResult.
154 155 156 157 158 159 160 161 162 |
# File 'lib/baidu.rb', line 154 def initialize(body,baseuri,pagenumber=nil) @body = Nokogiri::HTML body @baseuri = baseuri if pagenumber.nil? @pagenumber = 1 else @pagenumber = pagenumber end end |
Instance Method Details
#ads_bottom ⇒ Object
225 226 227 |
# File 'lib/baidu.rb', line 225 def ads_bottom [] end |
#ads_right ⇒ Object
222 223 224 |
# File 'lib/baidu.rb', line 222 def ads_right [] end |
#ads_top ⇒ Object
209 210 211 212 213 214 215 216 217 218 219 220 221 |
# File 'lib/baidu.rb', line 209 def ads_top id = 0 result = [] @body.search("div[@class='ec_wise_ad']/div").each do |div| id += 1 href = div.search("span[@class='ec_site']").first.text href = "http://#{href}" title = div.search("a/text()").text.strip host = Addressable::URI.parse(URI.encode(href)).host result[id] = {'title'=>title,'href'=>href,'host'=>host} end result end |
#next ⇒ Object
下一页
258 259 260 261 262 263 |
# File 'lib/baidu.rb', line 258 def next url = @body.xpath('//a[text()="下一页"]').first['href'] url = URI.join(@baseuri,url).to_s body = HTTParty.get(url) return MbaiduResult.new(body,url,@pagenumber+1) end |
#ranks ⇒ Object
返回当前页所有查询结果
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/baidu.rb', line 165 def ranks #如果已经赋值说明解析过,不需要重新解析,直接返回结果 return @ranks unless @ranks.nil? @ranks = Hash.new @body.xpath('//div[@class="result"]').each do |result| href,text,host,is_mobile = '','','',false a = result.search("a").first is_mobile = true unless a.search("img").empty? host = result.search('[@class="site"]').first next if host.nil? host = host.text href = a['href'] text = a.text id = href.scan(/&order=(\d+)&/) if id.empty? id = nil else id = id.first.first.to_i id = (@pagenumber-1)*10+id end =begin result.children.each do |elem| if elem.name == 'a' href = elem['href'] id = elem.text.match(/^\d+/).to_s.to_i text = elem.text.sub(/^\d+/,'') text.sub!(/^\u00A0/,'') elsif elem['class'] == 'abs' elem.children.each do |elem2| if elem2['class'] == 'site' host = elem2.text break end end elsif elem['class'] == 'site' host == elem['href'] end end =end @ranks[id.to_s] = {'href'=>href,'text'=>text,'is_mobile'=>is_mobile,'host'=>host.sub(/\u00A0/,'')} end @ranks end |
#related_keywords ⇒ Object
228 229 230 |
# File 'lib/baidu.rb', line 228 def [] end |