Class: WebPageParser::HTTP::Session
- Inherits:
-
Object
- Object
- WebPageParser::HTTP::Session
- Defined in:
- lib/web-page-parser/http.rb
Defined Under Namespace
Classes: CurlError
Instance Method Summary collapse
Instance Method Details
#curl ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/web-page-parser/http.rb', line 19 def curl @curl ||= Curl::Easy.new do |c| c.timeout = 8 c.connect_timeout = 8 c.dns_cache_timeout = 600 c. = true c.follow_location = true c.max_redirects = 6 c.autoreferer = true c.headers["User-Agent"] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4' c.headers["Accept-encoding"] = 'gzip, deflate' end end |
#get(url) ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/web-page-parser/http.rb', line 33 def get(url) curl.url = url if curl.perform == false raise CurlError, "curl.perform returned false" end uncompressed = gunzip(curl.body_str) uncompressed = inflate(curl.body_str) if uncompressed.nil? final_body = uncompressed || curl.body_str if final_body.respond_to?(:force_encoding) # Not sure if this is right. works for BBC/Guardian/New York Times anyway final_body.force_encoding("utf-8") end Response.new(final_body, curl) end |
#gunzip(s) ⇒ Object
54 55 56 57 58 59 60 |
# File 'lib/web-page-parser/http.rb', line 54 def gunzip(s) s = StringIO.new(s) Zlib::GzipReader.new(s).read rescue Zlib::DataError rescue Zlib::GzipFile::Error nil end |
#inflate(s) ⇒ Object
48 49 50 51 52 |
# File 'lib/web-page-parser/http.rb', line 48 def inflate(s) Zlib::Inflate.inflate(s) rescue Zlib::DataError nil end |