Class: Knj::Http2

Inherits:
Object show all
Defined in:
lib/knj/http2.rb

Overview

This class tries to emulate a browser in Ruby without any visual stuff. Remember cookies, keep sessions alive, reset connections according to keep-alive rules and more.

Examples

Knj::Http2.new(:host => “www.somedomain.com”, :port => 80, :ssl => false, :debug => false) do |http|

res = http.get("index.rhtml?show=some_page")
html = res.body
print html

res = res.post("index.rhtml?choice=login", {"username" => "John Doe", "password" => 123})
print res.body
print "#{res.headers}"

end

Defined Under Namespace

Classes: Response

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(args = {}) ⇒ Http2

Returns a new instance of Http2.



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/knj/http2.rb', line 15

def initialize(args = {})
  require "#{$knjpath}web"
  
  args = {:host => args} if args.is_a?(String)
  raise "Arguments wasnt a hash." if !args.is_a?(Hash)
  
  @args = args
  @cookies = {}
  @debug = @args[:debug]
  
  require "monitor"
  @mutex = Monitor.new
  
  if !@args[:port]
    if @args[:ssl]
      @args[:port] = 443
    else
      @args[:port] = 80
    end
  end
  
  if @args[:nl]
    @nl = @args[:nl]
  else
    @nl = "\r\n"
  end
  
  if @args[:user_agent]
    @uagent = @args[:user_agent]
  else
    @uagent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"
  end
  
  raise "No host was given." if !@args[:host]
  self.reconnect
  
  if block_given?
    begin
      yield(self)
    ensure
      self.destroy
    end
  end
end

Instance Attribute Details

#argsObject (readonly)

Returns the value of attribute args.



13
14
15
# File 'lib/knj/http2.rb', line 13

def args
  @args
end

#cookiesObject (readonly)

Returns the value of attribute cookies.



13
14
15
# File 'lib/knj/http2.rb', line 13

def cookies
  @cookies
end

Class Method Details

.post_convert_data(pdata, args = nil) ⇒ Object

This is used to convert a hash to valid post-data recursivly.



223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/knj/http2.rb', line 223

def self.post_convert_data(pdata, args = nil)
  praw = ""
  
  if pdata.is_a?(Hash)
    pdata.each do |key, val|
      praw << "&" if praw != ""
      
      if args and args[:orig_key]
        key = "#{args[:orig_key]}[#{key}]"
      end
      
      if val.is_a?(Hash) or val.is_a?(Array)
        praw << self.post_convert_data(val, {:orig_key => key})
      else
        praw << "#{Knj::Web.urlenc(key)}=#{Knj::Web.urlenc(Knj::Http2.post_convert_data(val))}"
      end
    end
  elsif pdata.is_a?(Array)
    count = 0
    pdata.each do |val|
      if args and args[:orig_key]
        key = "#{args[:orig_key]}[#{count}]"
      else
        key = count
      end
      
      if val.is_a?(Hash) or val.is_a?(Array)
        praw << self.post_convert_data(val, {:orig_key => key})
      else
        praw << "#{Knj::Web.urlenc(key)}=#{Knj::Web.urlenc(Knj::Http2.post_convert_data(val))}"
      end
      
      count += 1
    end
  else
    return pdata.to_s
  end
  
  return praw
end

Instance Method Details

#default_headers(args = {}) ⇒ Object

Returns the default headers for a request.

Examples

headers_hash = http.default_headers print “#headers_hash”



200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/knj/http2.rb', line 200

def default_headers(args = {})
  return args[:default_headers] if args[:default_headers]
  
  headers = {
    "Host" => @args[:host],
    "Connection" => "Keep-Alive",
    "User-Agent" => @uagent
  }
  
  if !@args.key?(:encoding_gzip) or @args[:encoding_gzip]
    headers["Accept-Encoding"] = "gzip"
  else
    #headers["Accept-Encoding"] = "none"
  end
  
  if @args[:basic_auth]
    headers["Authorization"] = "Basic #{Base64.encode64("#{@args[:basic_auth][:user]}:#{@args[:basic_auth][:passwd]}")}"
  end
  
  return headers
end

#destroyObject

Destroys the object unsetting all variables and closing all sockets.

Examples

http.destroy



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/knj/http2.rb', line 80

def destroy
  @args = nil
  @cookies = nil
  @debug = nil
  @mutex = nil
  @uagent = nil
  @keepalive_timeout = nil
  @request_last = nil
  
  @sock.close if @sock and !@sock.closed?
  @sock = nil
  
  @sock_plain.close if @sock_plain and !@sock_plain.closed?
  @sock_plain = nil
  
  @sock_ssl.close if @sock_ssl and !@sock_ssl.closed?
  @sock_ssl = nil
end

#get(addr, args = {}) ⇒ Object

Returns a result-object based on the arguments.

Examples

res = http.get(“somepage.html”) print res.body #=> <String>-object containing the HTML gotten.



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/knj/http2.rb', line 159

def get(addr, args = {})
  @mutex.synchronize do
    args[:addr] = addr
    header_str = "GET /#{addr} HTTP/1.1#{@nl}"
    header_str << self.header_str(self.default_headers(args), args)
    header_str << "#{@nl}"
    
    print "Http2: Writing headers.\n" if @debug
    print "Header str: #{header_str}\n" if @debug
    self.write(header_str)
    
    print "Http2: Reading response.\n" if @debug
    resp = self.read_response(args)
    
    print "Http2: Done with get request.\n" if @debug
    return resp
  end
end

#header_str(headers_hash, args = {}) ⇒ Object

Returns a header-string which normally would be used for a request in the given state.



381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
# File 'lib/knj/http2.rb', line 381

def header_str(headers_hash, args = {})
  if @cookies.length > 0 and (!args.key?(:cookies) or args[:cookies])
    cstr = ""
    
    first = true
    @cookies.each do |cookie_name, cookie_data|
      cstr << "; " if !first
      first = false if first
      
      if cookie_data.is_a?(Hash)
        cstr << "#{Knj::Web.urlenc(cookie_data["name"])}=#{Knj::Web.urlenc(cookie_data["value"])}"
      else
        cstr << "#{Knj::Web.urlenc(cookie_name)}=#{Knj::Web.urlenc(cookie_data)}"
      end
    end
    
    headers_hash["Cookie"] = cstr
  end
  
  headers_str = ""
  headers_hash.each do |key, val|
    headers_str << "#{key}: #{val}#{@nl}"
  end
  
  return headers_str
end

#on_content_call(args, line) ⇒ Object



408
409
410
# File 'lib/knj/http2.rb', line 408

def on_content_call(args, line)
  args[:on_content].call(line) if args.key?(:on_content)
end

#parse_body(line, args) ⇒ Object

Parses the body based on given headers and saves it to the result-object. http.parse_body(str)



563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
# File 'lib/knj/http2.rb', line 563

def parse_body(line, args)
  if @resp.args[:http_version] = "1.1"
    return "break" if @length == 0
    
    if @resp.header("transfer-encoding").to_s.downcase == "chunked"
      len = line.strip.hex
      
      if len > 0
        read = @sock.read(len)
        return "break" if read == "" or read == @nl
        @resp.args[:body] << read
        self.on_content_call(args, read)
      end
      
      nl = @sock.gets
      if len == 0
        if nl == @nl
          return "break"
        else
          raise "Dont know what to do :'-("
        end
      end
      
      raise "Should have read newline but didnt: '#{nl}'." if nl != @nl
    else
      @resp.args[:body] << line.to_s
      self.on_content_call(args, line)
      return "break" if @resp.header?("content-length") and @resp.args[:body].length >= @resp.header("content-length").to_i
    end
  else
    raise "Dont know how to read HTTP version: '#{@resp.args[:http_version]}'."
  end
end

#parse_header(line, args = {}) ⇒ Object

Parse a header-line and saves it on the object.

Examples

http.parse_header(“Content-Type: text/htmlrn”)



511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
# File 'lib/knj/http2.rb', line 511

def parse_header(line, args = {})
  if match = line.match(/^(.+?):\s*(.+)#{@nl}$/)
    key = match[1].to_s.downcase
    
    if key == "set-cookie"
      Knj::Web.parse_set_cookies(match[2]).each do |cookie_data|
        @cookies[cookie_data["name"]] = cookie_data
      end
    elsif key == "keep-alive"
      if ka_max = match[2].to_s.match(/max=(\d+)/)
        @keepalive_max = ka_max[1].to_i
        print "Http2: Keepalive-max set to: '#{@keepalive_max}'.\n" if @debug
      end
      
      if ka_timeout = match[2].to_s.match(/timeout=(\d+)/)
        @keepalive_timeout = ka_timeout[1].to_i
        print "Http2: Keepalive-timeout set to: '#{@keepalive_timeout}'.\n" if @debug
      end
    elsif key == "connection"
      @connection = match[2].to_s.downcase
    elsif key == "content-encoding"
      @encoding = match[2].to_s.downcase
    elsif key == "content-length"
      @length = match[2].to_i
    elsif key == "content-type"
      ctype = match[2].to_s
      if match_charset = ctype.match(/\s*;\s*charset=(.+)/i)
        @charset = match_charset[1].downcase
        @resp.args[:charset] = @charset
        ctype.gsub!(match_charset[0], "")
      end
      
      @ctype = ctype
      @resp.args[:contenttype] = @ctype
    end
    
    if key != "transfer-encoding" and key != "content-length" and key != "connection" and key != "keep-alive"
      self.on_content_call(args, line)
    end
    
    @resp.headers[key] = [] if !@resp.headers.key?(key)
    @resp.headers[key] << match[2]
  elsif match = line.match(/^HTTP\/([\d\.]+)\s+(\d+)\s+(.+)$/)
    @resp.args[:code] = match[2]
    @resp.args[:http_version] = match[1]
  else
    raise "Could not understand header string: '#{line}'.\n\n#{@sock.read(409600)}"
  end
end

#post(addr, pdata = {}, args = {}) ⇒ Object

Posts to a certain page.

Examples

res = http.post(“login.php”, {“username” => “John Doe”, “password” => 123)



267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# File 'lib/knj/http2.rb', line 267

def post(addr, pdata = {}, args = {})
  @mutex.synchronize do
    print "Doing post.\n" if @debug
    
    praw = Knj::Http2.post_convert_data(pdata)
    
    header_str = "POST /#{addr} HTTP/1.1#{@nl}"
    header_str << self.header_str(self.default_headers(args).merge("Content-Type" => "application/x-www-form-urlencoded", "Content-Length" => praw.length), args)
    header_str << "#{@nl}"
    header_str << praw
    
    print "Header str: #{header_str}\n" if @debug
    
    self.write(header_str)
    return self.read_response(args)
  end
end

#post_multipart(addr, pdata, args = {}) ⇒ Object

Posts to a certain page using the multipart-method.

Examples

res = http.post_multipart(“upload.php”, => 123, “file” => Tempfile.new(?))



288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
# File 'lib/knj/http2.rb', line 288

def post_multipart(addr, pdata, args = {})
  require "digest"
  
  @mutex.synchronize do
    boundary = Digest::MD5.hexdigest(Time.now.to_f.to_s)
    
    #Generate 'praw'-variable with post-content.
    tmp_path = "#{Knj::Os.tmpdir}/knj_http2_post_multiepart_tmp_#{boundary}"
    
    begin
      File.open(tmp_path, "w") do |praw|
        pdata.each do |key, val|
          praw << "--#{boundary}#{@nl}"
          
          if val.class.name == "Tempfile" and val.respond_to?("original_filename")
            praw << "Content-Disposition: form-data; name=\"#{key}\"; filename=\"#{val.original_filename}\";#{@nl}"
            praw << "Content-Length: #{val.to_s.bytesize}#{@nl}"
          elsif val.is_a?(Hash) and val[:filename]
            praw << "Content-Disposition: form-data; name=\"#{key}\"; filename=\"#{val[:filename]}\";#{@nl}"
            
            if val[:content]
              praw << "Content-Length: #{val[:content].to_s.bytesize}#{@nl}"
            elsif val[:fpath]
              praw << "Content-Length: #{File.size(val[:fpath])}#{@nl}"
            else
              raise "Could not figure out where to get content from."
            end
          else
            praw << "Content-Disposition: form-data; name=\"#{key}\";#{@nl}"
            praw << "Content-Length: #{val.to_s.bytesize}#{@nl}"
          end
          
          praw << "Content-Type: text/plain#{@nl}"
          praw << @nl
          
          if val.is_a?(StringIO)
            praw << val.read
          elsif val.is_a?(Hash) and val[:content]
            praw << val[:content].to_s
          elsif val.is_a?(Hash) and val[:fpath]
            File.open(val[:fpath], "r") do |fp|
              begin
                while data = fp.sysread(4096)
                  praw << data
                end
              rescue EOFError
                #ignore.
              end
            end
          else
            praw << val.to_s
          end
          
          praw << @nl
        end
        
        praw << "--#{boundary}--"
      end
      
      
      #Generate header-string containing 'praw'-variable.
      header_str = "POST /#{addr} HTTP/1.1#{@nl}"
      header_str << self.header_str(self.default_headers(args).merge(
        "Content-Type" => "multipart/form-data; boundary=#{boundary}",
        "Content-Length" => File.size(tmp_path)
      ), args)
      header_str << @nl
      
      
      #Debug.
      print "Headerstr: #{header_str}\n" if @debug
      
      
      #Write and return.
      self.write(header_str)
      File.open(tmp_path, "r") do |fp|
        begin
          while data = fp.sysread(4096)
            @sock.write(data)
          end
        rescue EOFError
          #ignore.
        end
      end
      
      return self.read_response(args)
    ensure
      File.unlink(tmp_path) if File.exists?(tmp_path)
    end
  end
end

#read_response(args = {}) ⇒ Object

Reads the response after posting headers and data.

Examples

res = http.read_response



415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
# File 'lib/knj/http2.rb', line 415

def read_response(args = {})
  @mode = "headers"
  @resp = Knj::Http2::Response.new
  
  loop do
    begin
      if @length and @length > 0 and @mode == "body"
        line = @sock.read(@length)
      else
        line = @sock.gets
      end
      
      print "<#{@mode}>: '#{line}'\n" if @debug
    rescue Errno::ECONNRESET
      print "Http2: The connection was reset while reading - breaking gently...\n" if @debug
      @sock = nil
      break
    end
    
    break if line.to_s == ""
    
    if @mode == "headers" and line == @nl
      print "Changing mode to body!\n" if @debug
      break if @length == 0
      @mode = "body"
      next
    end
    
    if @mode == "headers"
      self.parse_header(line, args)
    elsif @mode == "body"
      self.on_content_call(args, "\r\n")
      stat = self.parse_body(line, args)
      break if stat == "break"
      next if stat == "next"
    end
  end
  
  
  #Check if we should reconnect based on keep-alive-max.
  if @keepalive_max == 1 or @connection == "close"
    @sock.close if !@sock.closed?
    @sock = nil
  end
  
  
  #Check if the content is gzip-encoded - if so: decode it!
  if @encoding == "gzip"
    require "zlib"
    require "iconv"
    io = StringIO.new(@resp.args[:body])
    gz = Zlib::GzipReader.new(io)
    untrusted_str = gz.read
    ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
    valid_string = ic.iconv(untrusted_str + " ")[0..-2]
    @resp.args[:body] = valid_string
  end
  
  
  #Release variables.
  resp = @resp
  @resp = nil
  @mode = nil
  
  raise "No status-code was received from the server.\n\nHeaders:\n#{Knj::Php.print_r(resp.headers, true)}\n\nBody:\n#{resp.args[:body]}" if !resp.args[:code]
  
  if resp.args[:code].to_s == "302" and resp.header?("location") and (!@args.key?(:follow_redirects) or @args[:follow_redirects])
    require "uri"
    uri = URI.parse(resp.header("location"))
    url = uri.path
    url << "?#{uri.query}" if uri.query.to_s.length > 0
    
    args = {:host => uri.host}
    args[:ssl] = true if uri.scheme == "https"
    args[:port] = uri.port if uri.port
    
    print "Redirecting from location-header to '#{url}'.\n" if @debug
    
    if !args[:host] or args[:host] == @args[:host]
      return self.get(url)
    else
      http = Knj::Http2.new(args)
      return http.get(url)
    end
  elsif resp.args[:code].to_s == "500"
    raise "500 - Internal server error: '#{args[:addr]}':\n\n#{resp.body}"
  elsif resp.args[:code].to_s == "403"
    raise Knj::Errors::NoAccess
  else
    return resp
  end
end

#reconnectObject

Reconnects to the host.



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/knj/http2.rb', line 100

def reconnect
  require "socket"
  print "Http2: Reconnect.\n" if @debug
  
  #Reset variables.
  @keepalive_max = nil
  @keepalive_timeout = nil
  @connection = nil
  @contenttype = nil
  @charset = nil
  
  #Open connection.
  if @args[:proxy]
    print "Http2: Initializing proxy stuff.\n" if @debug
    @sock_plain = TCPSocket.new(@args[:proxy][:host], @args[:proxy][:port])
    @sock = @sock_plain
    
    @sock.write("CONNECT #{@args[:host]}:#{@args[:port]} HTTP/1.0#{@nl}")
    @sock.write("User-Agent: #{@uagent}#{@nl}")
    
    if @args[:proxy][:user] and @args[:proxy][:passwd]
      credential = ["#{@args[:proxy][:user]}:#{@args[:proxy][:passwd]}"].pack("m")
      credential.delete!("\r\n")
      @sock.write("Proxy-Authorization: Basic #{credential}#{@nl}")
    end
    
    @sock.write(@nl)
    
    res = @sock.gets
    raise res if res.to_s.downcase != "http/1.0 200 connection established#{@nl}"
    
    res_empty = @sock.gets
    raise "Empty res wasnt empty." if res_empty != @nl
  else
    print "Http2: Opening socket connection to '#{@args[:host]}:#{@args[:port]}'.\n" if @debug
    @sock_plain = TCPSocket.new(@args[:host], @args[:port].to_i)
  end
  
  if @args[:ssl]
    print "Http2: Initializing SSL.\n" if @debug
    require "openssl"
    
    ssl_context = OpenSSL::SSL::SSLContext.new
    #ssl_context.verify_mode = OpenSSL::SSL::VERIFY_PEER
    
    @sock_ssl = OpenSSL::SSL::SSLSocket.new(@sock_plain, ssl_context)
    @sock_ssl.sync_close = true
    @sock_ssl.connect
    
    @sock = @sock_ssl
  else
    @sock = @sock_plain
  end
end

#socket_working?Boolean

Returns boolean based on the if the object is connected and the socket is working.

Examples

print “Socket is working.” if http.socket_working?

Returns:

  • (Boolean)


63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/knj/http2.rb', line 63

def socket_working?
  return false if !@sock or @sock.closed?
  
  if @keepalive_timeout and @request_last
    between = Time.now.to_i - @request_last.to_i
    if between >= @keepalive_timeout
      print "Http2: We are over the keepalive-wait - returning false for socket_working?.\n" if @debug
      return false
    end
  end
  
  return true
end

#write(str) ⇒ Object

Tries to write a string to the socket. If it fails it reconnects and tries again.



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/knj/http2.rb', line 179

def write(str)
  #Reset variables.
  @length = nil
  @encoding = nil
  self.reconnect if !self.socket_working?
  
  begin
    raise Errno::EPIPE, "The socket is closed." if !@sock or @sock.closed?
    @sock.puts(str)
  rescue Errno::EPIPE #this can also be thrown by puts.
    self.reconnect
    @sock.puts(str)
  end
  
  @request_last = Time.now
end