Class: SpiderBot::Http::Response

Inherits:
Object
  • Object
show all
Defined in:
lib/spider_bot/http/response.rb

Constant Summary collapse

CONTENT_TYPE =
{
  'application/json' => :json,
  'application/x-www-form-urlencoded' => :html,
  'text/html' => :html,
  'text/javascript' => :json,
  'text/xml' => :xml
}
PARSERS =
{
  :json => lambda{ |body| MultiJson.respond_to?(:adapter) ? MultiJson.load(body) : MultiJson.decode(body) rescue body},
  :html => lambda{ |body| Nokogiri::HTML(body)},
  :xml => lambda{ |body| MultiXml.parse(body) }
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(response) ⇒ Response

Returns a new instance of Response.



20
21
22
# File 'lib/spider_bot/http/response.rb', line 20

def initialize(response)
  @response = response
end

Instance Attribute Details

#responseObject (readonly)

Returns the value of attribute response.



4
5
6
# File 'lib/spider_bot/http/response.rb', line 4

def response
  @response
end

Instance Method Details

#body(options = {}) ⇒ Object



28
29
30
31
# File 'lib/spider_bot/http/response.rb', line 28

def body(options = {})
  options = options || {}
  decode(response.body, options)
end

#charset_covert(charset) ⇒ Object



73
74
75
76
77
78
79
80
# File 'lib/spider_bot/http/response.rb', line 73

def charset_covert(charset)
  case charset
  when "gb2312", "GB2312", "GBK"
    "gbk"
  else
    charset
  end
end

#content_typeObject

Attempts to determine the content type of the response.



54
55
56
# File 'lib/spider_bot/http/response.rb', line 54

def content_type
  ((response.headers.values_at('content-type', 'Content-Type').compact.first || '').split(';').first || '').strip
end

#decode(body, options = {}) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/spider_bot/http/response.rb', line 33

def decode(body, options = {})
  return '' if !body 
  return body if json?
  charset = body.match(/charset\s*=[\s|\W]*([\w-]+)/)
  return body if charset[1].downcase == "utf-8"
  charset_code = charset_covert(charset[1])
  begin
    if options[:encode]
      return body.encode! "utf-8", options[:encode], {:invalid => :replace} 
    end
    body.encode! "utf-8", charset_code, {:invalid => :replace} 
  rescue
    body
  end
end

#headersObject



24
25
26
# File 'lib/spider_bot/http/response.rb', line 24

def headers
  response.headers
end

#json?Boolean

Returns:

  • (Boolean)


58
59
60
# File 'lib/spider_bot/http/response.rb', line 58

def json?
  CONTENT_TYPE[content_type] == :json || !response.body.match(/\<html/)
end

#parsedObject



69
70
71
# File 'lib/spider_bot/http/response.rb', line 69

def parsed
  @parsed ||= PARSERS[parser].call(body)
end

#parserObject



62
63
64
65
66
67
# File 'lib/spider_bot/http/response.rb', line 62

def parser
  type = CONTENT_TYPE[content_type]
  type = :json if type == :html && !response.body.match(/\<.*html|/) 
  type = :html if type.nil?
  return type
end

#statusObject



49
50
51
# File 'lib/spider_bot/http/response.rb', line 49

def status
  response.status
end