Class: Net::HTTPResponse

Inherits:
Object show all
Defined in:
lib/rbot/core/utils/httputil.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#no_cacheObject

Returns the value of attribute no_cache.



27
28
29
# File 'lib/rbot/core/utils/httputil.rb', line 27

def no_cache
  @no_cache
end

Instance Method Details

#body_charset(str = self.raw_body) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/rbot/core/utils/httputil.rb', line 32

def body_charset(str=self.raw_body)
  ctype = self['content-type'] || 'text/html'
  return nil unless ctype =~ /^text/i || ctype =~ /x(ht)?ml/i

  charsets = ['latin1'] # should be in config

  if ctype.match(/charset=["']?([^\s"']+)["']?/i)
    charsets << $1
    debug "charset #{charsets.last} added from header"
  end

  case str
  when /<\?xml\s[^>]*encoding=['"]([^\s"'>]+)["'][^>]*\?>/i
    charsets << $1
    debug "xml charset #{charsets.last} added from xml pi"
  when /<(meta\s[^>]*http-equiv=["']?Content-Type["']?[^>]*)>/i
    meta = $1
    if meta =~ /charset=['"]?([^\s'";]+)['"]?/
      charsets << $1
      debug "html charset #{charsets.last} added from meta"
    end
  end
  return charsets.uniq
end

#body_to_utf(str) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/rbot/core/utils/httputil.rb', line 57

def body_to_utf(str)
  charsets = self.body_charset(str) or return str

  charsets.reverse_each do |charset|
    # XXX: this one is really ugly, but i don't know how to make it better
    #  -jsn

    0.upto(5) do |off|
      begin
        debug "trying #{charset} / offset #{off}"
        return Iconv.iconv('utf-8//ignore',
                           charset,
                           str.slice(0 .. (-1 - off))).first
      rescue
        debug "conversion failed for #{charset} / offset #{off}"
      end
    end
  end
  return str
end

#cooked_bodyObject



126
127
128
# File 'lib/rbot/core/utils/httputil.rb', line 126

def cooked_body
  return self.body_to_utf(self.decompress_body(self.raw_body))
end

#decompress_body(str) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/rbot/core/utils/httputil.rb', line 78

def decompress_body(str)
  method = self['content-encoding']
  case method
  when nil
    return str
  when /gzip/ # Matches gzip, x-gzip, and the non-rfc-compliant gzip;q=\d sent by some servers
    debug "gunzipping body"
    begin
      return Zlib::GzipReader.new(StringIO.new(str)).read
    rescue Zlib::Error => e
      # If we can't unpack the whole stream (e.g. because we're doing a
      # partial read
      debug "full gunzipping failed (#{e}), trying to recover as much as possible"
      ret = ""
      begin
        Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte|
          ret << byte
        }
      rescue
      end
      return ret
    end
  when 'deflate'
    debug "inflating body"
    # From http://www.koders.com/ruby/fid927B4382397E5115AC0ABE21181AB5C1CBDD5C17.aspx?s=thread:
    # -MAX_WBITS stops zlib from looking for a zlib header
    inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS)
    begin
      return inflater.inflate(str)
    rescue Zlib::Error => e
      raise e
      # TODO
      # debug "full inflation failed (#{e}), trying to recover as much as possible"
    end
  when /^(?:iso-8859-\d+|windows-\d+|utf-8|utf8)$/i
    # B0rked servers (Freshmeat being one of them) sometimes return the charset
    # in the content-encoding; in this case we assume that the document has
    # a standarc content-encoding
    old_hsh = self.to_hash
    self['content-type']= self['content-type']+"; charset="+method.downcase
    warning "Charset vs content-encoding confusion, trying to recover: from\n#{old_hsh.pretty_inspect}to\n#{self.to_hash.pretty_inspect}"
    return str
  else
    debug self.to_hash
    raise "Unhandled content encoding #{method}"
  end
end

#partial_body(size = 0, &block) ⇒ Object

Read chunks from the body until we have at least size bytes, yielding the partial text at each chunk. Return the partial body.



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/rbot/core/utils/httputil.rb', line 132

def partial_body(size=0, &block)

  partial = String.new

  if @read
    debug "using body() as partial"
    partial = self.body
    yield self.body_to_utf(self.decompress_body(partial)) if block_given?
  else
    debug "disabling cache"
    self.no_cache = true
    self.read_body { |chunk|
      partial << chunk
      yield self.body_to_utf(self.decompress_body(partial)) if block_given?
      break if size and size > 0 and partial.length >= size
    }
  end

  return self.body_to_utf(self.decompress_body(partial))
end