Class: Unicorn::HttpParser

Inherits:
Object
  • Object
show all
Defined in:
lib/unicorn/http_request.rb,
ext/unicorn_http/unicorn_http.c

Constant Summary collapse

DEFAULTS =

default parameters we merge into the request env for Rack handlers

{
  "rack.errors" => $stderr,
  "rack.multiprocess" => true,
  "rack.multithread" => false,
  "rack.run_once" => false,
  "rack.version" => [1, 1],
  "SCRIPT_NAME" => "",

  # this is not in the Rack spec, but some apps may rely on it
  "SERVER_SOFTWARE" => "Unicorn #{Unicorn::Const::UNICORN_VERSION}"
}
NULL_IO =
StringIO.new("")
REMOTE_ADDR =

:stopdoc: A frozen format for this is about 15% faster

'REMOTE_ADDR'.freeze
RACK_INPUT =
'rack.input'.freeze
TeeInput =
Unicorn::TeeInput
CHUNK_MAX =

The maximum size a single chunk when using chunked transfer encoding. This is only a theoretical maximum used to detect errors in clients, it is highly unlikely to encounter clients that send more than several kilobytes at once.

OFFT2NUM(UH_OFF_T_MAX)
LENGTH_MAX =

The maximum size of the body as specified by Content-Length. This is only a theoretical maximum, the actual limit is subject to the limits of the file system used for Dir.tmpdir.

OFFT2NUM(UH_OFF_T_MAX)

Instance Method Summary collapse

Constructor Details

#newObject

Creates a new parser.



3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
# File 'ext/unicorn_http/unicorn_http.c', line 3252

static VALUE HttpParser_init(VALUE self)
{
  struct http_parser *hp = data_get(self);

  http_parser_init(hp);
  hp->buf = rb_str_new(NULL, 0);
  hp->env = rb_hash_new();

  return self;
}

Instance Method Details

#body_eof?Boolean

Detects if we’re done filtering the body or not. This can be used to detect when to stop calling HttpParser#filter_body.

Returns:

  • (Boolean)


3382
3383
3384
3385
3386
3387
3388
3389
3390
# File 'ext/unicorn_http/unicorn_http.c', line 3382

static VALUE HttpParser_body_eof(VALUE self)
{
  struct http_parser *hp = data_get(self);

  if (HP_FL_TEST(hp, CHUNKED))
    return chunked_eof(hp) ? Qtrue : Qfalse;

  return hp->len.content == 0 ? Qtrue : Qfalse;
}

#bufObject



3425
3426
3427
3428
# File 'ext/unicorn_http/unicorn_http.c', line 3425

static VALUE HttpParser_buf(VALUE self)
{
  return data_get(self)->buf;
}

#content_lengthnil, Integer

Returns the number of bytes left to run through HttpParser#filter_body. This will initially be the value of the “Content-Length” HTTP header after header parsing is complete and will decrease in value as HttpParser#filter_body is called for each chunk. This should return zero for requests with no body.

This will return nil on “Transfer-Encoding: chunked” requests.

Returns:

  • (nil, Integer)


3308
3309
3310
3311
3312
3313
# File 'ext/unicorn_http/unicorn_http.c', line 3308

static VALUE HttpParser_content_length(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_TEST(hp, CHUNKED) ? Qnil : OFFT2NUM(hp->len.content);
}

#envObject



3430
3431
3432
3433
# File 'ext/unicorn_http/unicorn_http.c', line 3430

static VALUE HttpParser_env(VALUE self)
{
  return data_get(self)->env;
}

#filter_body(buf, data) ⇒ Object

Takes a String of data, will modify data if dechunking is done. Returns nil if there is more data left to process. Returns data if body processing is complete. When returning data, it may modify data so the start of the string points to where the body ended so that trailer processing can begin.

Raises HttpParserError if there are dechunking errors. Basically this is a glorified memcpy(3) that copies data into buf while filtering it through the dechunker.



3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
# File 'ext/unicorn_http/unicorn_http.c', line 3449

static VALUE HttpParser_filter_body(VALUE self, VALUE buf, VALUE data)
{
  struct http_parser *hp = data_get(self);
  char *dptr;
  long dlen;

  dptr = RSTRING_PTR(data);
  dlen = RSTRING_LEN(data);

  StringValue(buf);
  rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
  OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */

  if (HP_FL_TEST(hp, CHUNKED)) {
    if (!chunked_eof(hp)) {
      hp->s.dest_offset = 0;
      hp->cont = buf;
      hp->buf = data;
      http_parser_execute(hp, dptr, dlen);
      if (hp->cs == http_parser_error)
        parser_error("Invalid HTTP format, parsing fails.");

      assert(hp->s.dest_offset <= hp->offset &&
             "destination buffer overflow");
      advance_str(data, hp->offset);
      rb_str_set_len(buf, hp->s.dest_offset);

      if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) {
        assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
      } else {
        data = Qnil;
      }
    }
  } else {
    /* no need to enter the Ragel machine for unchunked transfers */
    assert(hp->len.content >= 0 && "negative Content-Length");
    if (hp->len.content > 0) {
      long nr = MIN(dlen, hp->len.content);

      memcpy(RSTRING_PTR(buf), dptr, nr);
      hp->len.content -= nr;
      if (hp->len.content == 0)
        hp->cs = http_parser_first_final;
      advance_str(data, nr);
      rb_str_set_len(buf, nr);
      data = Qnil;
    }
  }
  hp->offset = 0; /* for trailer parsing */
  return data;
}

#headers(env, buf) ⇒ Object



3360
3361
3362
3363
3364
3365
3366
3367
3368
# File 'ext/unicorn_http/unicorn_http.c', line 3360

static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf)
{
  struct http_parser *hp = data_get(self);

  hp->env = env;
  hp->buf = buf;

  return HttpParser_parse(self);
}

#headers?Boolean

This should be used to detect if a request has headers (and if the response will have headers as well). HTTP/0.9 requests should return false, all subsequent HTTP versions will return true

Returns:

  • (Boolean)


3418
3419
3420
3421
3422
3423
# File 'ext/unicorn_http/unicorn_http.c', line 3418

static VALUE HttpParser_has_headers(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_TEST(hp, HASHEADER) ? Qtrue : Qfalse;
}

#keepalive?Boolean

This should be used to detect if a request can really handle keepalives and pipelining. Currently, the rules are:

  1. MUST be a GET or HEAD request

  2. MUST be HTTP/1.1 or HTTP/1.0 with “Connection: keep-alive”

  3. MUST NOT have “Connection: close” set

Returns:

  • (Boolean)


3403
3404
3405
3406
3407
3408
# File 'ext/unicorn_http/unicorn_http.c', line 3403

static VALUE HttpParser_keepalive(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_ALL(hp, KEEPALIVE) ? Qtrue : Qfalse;
}

#parsenil

Takes a Hash and a String of data, parses the String of data filling in the Hash returning the Hash if parsing is finished, nil otherwise When returning the env Hash, it may modify data to point to where body processing should begin.

Raises HttpParserError if there are parsing errors.

Returns:

  • (nil)


3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
# File 'ext/unicorn_http/unicorn_http.c', line 3327

static VALUE HttpParser_parse(VALUE self)
{
  struct http_parser *hp = data_get(self);
  VALUE data = hp->buf;

  http_parser_execute(hp, RSTRING_PTR(data), RSTRING_LEN(data));
  VALIDATE_MAX_LENGTH(hp->offset, HEADER);

  if (hp->cs == http_parser_first_final ||
      hp->cs == http_parser_en_ChunkedBody) {
    advance_str(data, hp->offset + 1);
    hp->offset = 0;

    return hp->env;
  }

  if (hp->cs == http_parser_error)
    parser_error("Invalid HTTP format, parsing fails.");

  return Qnil;
}

#read(socket) ⇒ Object

Does the majority of the IO processing. It has been written in Ruby using about 8 different IO processing strategies.

It is currently carefully constructed to make sure that it gets the best possible performance for the common case: GET requests that are fully complete after a single read(2)

Anyone who thinks they can make it faster is more than welcome to take a crack at it.

returns an environment hash suitable for Rack if successful This does minimal exception trapping and it is up to the caller to handle any socket errors (e.g. user aborted upload).



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/unicorn/http_request.rb', line 44

def read(socket)
  reset
  e = env

  # From http://www.ietf.org/rfc/rfc3875:
  # "Script authors should be aware that the REMOTE_ADDR and
  #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
  #  may not identify the ultimate source of the request.  They
  #  identify the client for the immediate request to the server;
  #  that client may be a proxy, gateway, or other intermediary
  #  acting on behalf of the actual source client."
  e[REMOTE_ADDR] = socket.kgio_addr

  # short circuit the common case with small GET requests first
  socket.kgio_read!(16384, buf)
  if parse.nil?
    # Parser is not done, queue up more data to read and continue parsing
    # an Exception thrown from the parser will throw us out of the loop
    begin
      buf << socket.kgio_read!(16384)
    end while parse.nil?
  end
  e[RACK_INPUT] = 0 == content_length ? NULL_IO : TeeInput.new(socket, self)
  e.merge!(DEFAULTS)
end

#resetnil

Resets the parser to it’s initial state so that you can reuse it rather than making new ones.

Returns:

  • (nil)


3270
3271
3272
3273
3274
3275
3276
3277
3278
# File 'ext/unicorn_http/unicorn_http.c', line 3270

static VALUE HttpParser_reset(VALUE self)
{
  struct http_parser *hp = data_get(self);

  http_parser_init(hp);
  rb_funcall(hp->env, id_clear, 0);

  return Qnil;
}

#trailers(req, data) ⇒ nil

This is an alias for HttpParser#headers

Returns:

  • (nil)


3360
3361
3362
3363
3364
3365
3366
3367
3368
# File 'ext/unicorn_http/unicorn_http.c', line 3360

static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf)
{
  struct http_parser *hp = data_get(self);

  hp->env = env;
  hp->buf = buf;

  return HttpParser_parse(self);
}