Class: WebCrawler::Response

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/web_crawler/response.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, response) ⇒ Response

Returns a new instance of Response.

Raises:

  • (ArgumentError)


11
12
13
14
15
16
# File 'lib/web_crawler/response.rb', line 11

def initialize(url, response)
  raise ArgumentError, "response must be a Net::HTTPResponse, but #{response.class} given" unless response.is_a? Net::HTTPResponse
  @url, @response = url, response
  @date = Time.parse(self['Date']) rescue Time.now
  @expire ||= Time.parse(self['Expires']) rescue Time.now
end

Instance Attribute Details

#cachedObject (readonly)

Returns the value of attribute cached.



9
10
11
# File 'lib/web_crawler/response.rb', line 9

def cached
  @cached
end

#dateObject (readonly)

Returns the value of attribute date.



9
10
11
# File 'lib/web_crawler/response.rb', line 9

def date
  @date
end

#expireObject (readonly)

Returns the value of attribute expire.



9
10
11
# File 'lib/web_crawler/response.rb', line 9

def expire
  @expire
end

#urlObject (readonly)

Returns the value of attribute url.



9
10
11
# File 'lib/web_crawler/response.rb', line 9

def url
  @url
end

Instance Method Details

#bodyObject Also known as: to_s



57
58
59
60
61
62
63
64
# File 'lib/web_crawler/response.rb', line 57

def body
  encoding = (self['Content-Type'] || 'text/html; charset=UTF-8').split("=").last
  @body ||= if encoding.upcase == 'UTF-8'
              @response.body
            else
              encode_body(encoding.upcase)
            end
end

#encode_body(from) ⇒ Object



68
69
70
71
72
73
74
75
# File 'lib/web_crawler/response.rb', line 68

def encode_body(from)
  require "iconv" unless defined?(Iconv)
  encoded = Iconv.iconv('UTF-8', from, @response.body).first
  if xml?
    encoded = encoded.gsub(/<\?xml version="(.*?)" encoding=".*?"\?>/, "<?xml version=\"1.0\" encoding=\"utf-8\"?>")
  end
  encoded
end

#failure?Boolean

Returns:

  • (Boolean)


38
39
40
# File 'lib/web_crawler/response.rb', line 38

def failure?
  !success?
end

#foul?Boolean

Returns:

  • (Boolean)


30
31
32
# File 'lib/web_crawler/response.rb', line 30

def foul?
  date >= expire
end

#headerObject



53
54
55
# File 'lib/web_crawler/response.rb', line 53

def header
  @header ||= Hash[@response.to_hash.map(&:flatten)]
end

#inspectObject



42
43
44
45
46
47
# File 'lib/web_crawler/response.rb', line 42

def inspect
  redirected = redirect? ? " redirect path: \"" + redirect_path.join(', ') + "\"" : ""
  "#<#{self.class}::0x#{self.object_id.to_s(16).rjust(14, '0')}#{@cached} " <<
      "#{type} #{code} #{message} #{@url}" <<
      "#{redirected}>"
end

#mime_typeObject



49
50
51
# File 'lib/web_crawler/response.rb', line 49

def mime_type
  MIME::Types[header['content-type'] || "text/html; charset=utf-8"].first
end

#set_cached_flagObject



26
27
28
# File 'lib/web_crawler/response.rb', line 26

def set_cached_flag
  @cached = ' CACHED'
end

#success?Boolean

Returns:

  • (Boolean)


34
35
36
# File 'lib/web_crawler/response.rb', line 34

def success?
  @response.is_a? Net::HTTPSuccess
end

#typeObject



77
78
79
# File 'lib/web_crawler/response.rb', line 77

def type
  @response.class
end