Class: Pitchfork::HttpParser

Inherits:
Object
  • Object
show all
Defined in:
lib/pitchfork/http_parser.rb,
ext/pitchfork_http/pitchfork_http.c

Constant Summary collapse

DEFAULTS =

default parameters we merge into the request env for Rack handlers

{
  "rack.errors" => $stderr,
  "rack.multiprocess" => true,
  "rack.multithread" => false,
  "rack.run_once" => false,
  "rack.version" => [1, 2],
  "rack.hijack?" => true,
  "SCRIPT_NAME" => "",

  # this is not in the Rack spec, but some apps may rely on it
  "SERVER_SOFTWARE" => "Pitchfork #{Pitchfork::Const::UNICORN_VERSION}"
}
NULL_IO =
StringIO.new("")
HTTP_RESPONSE_START =

:stopdoc:

[ 'HTTP'.freeze, '/1.1 '.freeze ]
EMPTY_ARRAY =
[].freeze
TCPI =
Raindrops::TCP_Info.allocate
CHUNK_MAX =

The maximum size a single chunk when using chunked transfer encoding. This is only a theoretical maximum used to detect errors in clients, it is highly unlikely to encounter clients that send more than several kilobytes at once.

OFFT2NUM(UH_OFF_T_MAX)
LENGTH_MAX =

The maximum size of the body as specified by Content-Length. This is only a theoretical maximum, the actual limit is subject to the limits of the file system used for Dir.tmpdir.

OFFT2NUM(UH_OFF_T_MAX)
@@input_class =
Pitchfork::TeeInput
@@check_client_connection =
false
@@tcpi_inspect_ok =
Socket.const_defined?(:TCP_INFO)

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#newObject

Creates a new parser.



3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
# File 'ext/pitchfork_http/pitchfork_http.c', line 3914

static VALUE HttpParser_init(VALUE self)
{
  struct http_parser *hp = data_get(self);

  http_parser_init(hp);
  RB_OBJ_WRITE(self, &hp->buf, rb_str_new(NULL, 0));
  RB_OBJ_WRITE(self, &hp->env, rb_hash_new());

  return self;
}

Class Method Details

.check_client_connectionObject



39
40
41
# File 'lib/pitchfork/http_parser.rb', line 39

def self.check_client_connection
  @@check_client_connection
end

.check_client_connection=(bool) ⇒ Object



43
44
45
# File 'lib/pitchfork/http_parser.rb', line 43

def self.check_client_connection=(bool)
  @@check_client_connection = bool
end

.input_classObject



31
32
33
# File 'lib/pitchfork/http_parser.rb', line 31

def self.input_class
  @@input_class
end

.input_class=(klass) ⇒ Object



35
36
37
# File 'lib/pitchfork/http_parser.rb', line 35

def self.input_class=(klass)
  @@input_class = klass
end

.is_chunked?(v) ⇒ Boolean

called by ext/pitchfork_http/pitchfork_http.rl via rb_funcall

Returns:

  • (Boolean)

Raises:



195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/pitchfork/http_parser.rb', line 195

def self.is_chunked?(v) # :nodoc:
  vals = v.split(',')
  vals.each do |val|
    val.strip!
    val.downcase!
  end

  if vals.pop == 'chunked'.freeze
    return true unless vals.include?('chunked'.freeze)
    raise Pitchfork::HttpParserError, 'double chunked', []
  end
  return false unless vals.include?('chunked'.freeze)
  raise Pitchfork::HttpParserError, 'chunked not last', []
end

.max_header_len=(len) ⇒ Object

this is only intended for use with Rainbows!



41
42
43
44
# File 'ext/pitchfork_http/pitchfork_http.c', line 41

static VALUE set_maxhdrlen(VALUE self, VALUE len)
{
  return UINT2NUM(MAX_HEADER_LEN = NUM2UINT(len));
}

Instance Method Details

#add_parse(buffer) ⇒ nil

adds the contents of buffer to the internal buffer and attempts to continue parsing. Returns the env Hash on success or nil if more data is needed.

Raises HttpParserError if there are parsing errors.

Returns:

  • (nil)


4032
4033
4034
4035
4036
4037
4038
4039
4040
# File 'ext/pitchfork_http/pitchfork_http.c', line 4032

static VALUE HttpParser_add_parse(VALUE self, VALUE buffer)
{
  struct http_parser *hp = data_get(self);

  Check_Type(buffer, T_STRING);
  rb_str_buf_append(hp->buf, buffer);

  return HttpParser_parse(self);
}

#body_eof?Boolean

Detects if we’re done filtering the body or not. This can be used to detect when to stop calling HttpParser#filter_body.

Returns:

  • (Boolean)


4075
4076
4077
4078
4079
4080
4081
4082
4083
# File 'ext/pitchfork_http/pitchfork_http.c', line 4075

static VALUE HttpParser_body_eof(VALUE self)
{
  struct http_parser *hp = data_get(self);

  if (HP_FL_TEST(hp, CHUNKED))
    return chunked_eof(hp) ? Qtrue : Qfalse;

  return hp->len.content == 0 ? Qtrue : Qfalse;
}

#bufObject



4136
4137
4138
4139
# File 'ext/pitchfork_http/pitchfork_http.c', line 4136

static VALUE HttpParser_buf(VALUE self)
{
  return data_get(self)->buf;
}

#callObject

for rack.hijack, we respond to this method so no extra allocation of a proc object



101
102
103
104
# File 'lib/pitchfork/http_parser.rb', line 101

def call
  hijacked!
  env['rack.hijack_io'] = env['pitchfork.socket']
end

#check_client_connection(socket) ⇒ Object

Ruby 2.2+ can show struct tcp_info as a string Socket::Option#inspect. Not that efficient, but probably still better than doing unnecessary work after a client gives up.



113
114
115
116
117
118
119
120
121
# File 'lib/pitchfork/http_parser.rb', line 113

def check_client_connection(socket) # :nodoc:
  if TCPSocket === socket
    # Raindrops::TCP_Info#get!, #state (reads struct tcp_info#tcpi_state)
    raise Errno::EPIPE, "client closed connection".freeze,
          EMPTY_ARRAY if closed_state?(TCPI.get!(socket).state)
  else
    write_http_header(socket)
  end
end

#clearObject

Resets the parser to it’s initial state so that you can reuse it rather than making new ones.



3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
# File 'ext/pitchfork_http/pitchfork_http.c', line 3932

static VALUE HttpParser_clear(VALUE self)
{
  struct http_parser *hp = data_get(self);

  /* we can't safely reuse .buf and .env if hijacked */
  if (HP_FL_TEST(hp, HIJACK))
    return HttpParser_init(self);

  http_parser_init(hp);
  rb_hash_clear(hp->env);

  return self;
}

#closed_state?(state) ⇒ Boolean

raindrops before 0.18 only supported TCP_INFO under Linux

Returns:

  • (Boolean)


142
143
144
145
146
147
148
149
150
151
# File 'lib/pitchfork/http_parser.rb', line 142

def closed_state?(state) # :nodoc:
  case state
  when 1 # ESTABLISHED
    false
  when 8, 6, 7, 9, 11 # CLOSE_WAIT, TIME_WAIT, CLOSE, LAST_ACK, CLOSING
    true
  else
    false
  end
end

#closed_state_str?(state) ⇒ Boolean

Returns:

  • (Boolean)


174
175
176
177
178
179
180
181
182
183
184
# File 'lib/pitchfork/http_parser.rb', line 174

def closed_state_str?(state)
  case state
  when 'ESTABLISHED'
    false
  # not a typo, ruby maps TCP_CLOSE (no 'D') to state=CLOSED (w/ 'D')
  when 'CLOSE_WAIT', 'TIME_WAIT', 'CLOSED', 'LAST_ACK', 'CLOSING'
    true
  else
    false
  end
end

#content_lengthnil, Integer

Returns the number of bytes left to run through HttpParser#filter_body. This will initially be the value of the “Content-Length” HTTP header after header parsing is complete and will decrease in value as HttpParser#filter_body is called for each chunk. This should return zero for requests with no body.

This will return nil on “Transfer-Encoding: chunked” requests.

Returns:

  • (nil, Integer)


3974
3975
3976
3977
3978
3979
# File 'ext/pitchfork_http/pitchfork_http.c', line 3974

static VALUE HttpParser_content_length(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_TEST(hp, CHUNKED) ? Qnil : OFFT2NUM(hp->len.content);
}

#envObject



4141
4142
4143
4144
# File 'ext/pitchfork_http/pitchfork_http.c', line 4141

static VALUE HttpParser_env(VALUE self)
{
  return data_get(self)->env;
}

#filter_body(dst, src) ⇒ Object

Takes a String of src, will modify data if dechunking is done. Returns nil if there is more data left to process. Returns src if body processing is complete. When returning src, it may modify src so the start of the string points to where the body ended so that trailer processing can begin.

Raises HttpParserError if there are dechunking errors. Basically this is a glorified memcpy(3) that copies src into buf while filtering it through the dechunker.



4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
# File 'ext/pitchfork_http/pitchfork_http.c', line 4169

static VALUE HttpParser_filter_body(VALUE self, VALUE dst, VALUE src)
{
  struct http_parser *hp = data_get(self);
  char *srcptr;
  long srclen;

  srcptr = RSTRING_PTR(src);
  srclen = RSTRING_LEN(src);

  StringValue(dst);

  if (HP_FL_TEST(hp, CHUNKED)) {
    if (!chunked_eof(hp)) {
      rb_str_modify(dst);
      rb_str_resize(dst, srclen); /* we can never copy more than srclen bytes */

      hp->s.dest_offset = 0;
      RB_OBJ_WRITE(self, &hp->cont, dst);
      RB_OBJ_WRITE(self, &hp->buf, src);
      http_parser_execute(self, hp, srcptr, srclen);
      if (hp->cs == http_parser_error)
        parser_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");

      assert(hp->s.dest_offset <= hp->offset &&
             "destination buffer overflow");
      advance_str(src, hp->offset);
      rb_str_set_len(dst, hp->s.dest_offset);

      if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) {
        assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
      } else {
        src = Qnil;
      }
    }
  } else {
    /* no need to enter the Ragel machine for unchunked transfers */
    assert(hp->len.content >= 0 && "negative Content-Length");
    if (hp->len.content > 0) {
      long nr = MIN(srclen, hp->len.content);

      rb_str_modify(dst);
      rb_str_resize(dst, nr);
      /*
       * using rb_str_replace() to avoid memcpy() doesn't help in
       * most cases because a GC-aware programmer will pass an explicit
       * buffer to env["rack.input"].read and reuse the buffer in a loop.
       * This causes copy-on-write behavior to be triggered anyways
       * when the +src+ buffer is modified (when reading off the socket).
       */
      RB_OBJ_WRITE(self, &hp->buf, src);
      memcpy(RSTRING_PTR(dst), srcptr, nr);
      hp->len.content -= nr;
      if (hp->len.content == 0) {
        HP_FL_SET(hp, REQEOF);
        hp->cs = http_parser_first_final;
      }
      advance_str(src, nr);
      src = Qnil;
    }
  }
  hp->offset = 0; /* for trailer parsing */
  return src;
}

#headers(env, buf) ⇒ Object



4053
4054
4055
4056
4057
4058
4059
4060
4061
# File 'ext/pitchfork_http/pitchfork_http.c', line 4053

static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf)
{
  struct http_parser *hp = data_get(self);

  RB_OBJ_WRITE(self, &hp->buf, buf);
  RB_OBJ_WRITE(self, &hp->env, env);

  return HttpParser_parse(self);
}

#headers?Boolean

This should be used to detect if a request has headers (and if the response will have headers as well). HTTP/0.9 requests should return false, all subsequent HTTP versions will return true

Returns:

  • (Boolean)


4129
4130
4131
4132
4133
4134
# File 'ext/pitchfork_http/pitchfork_http.c', line 4129

static VALUE HttpParser_has_headers(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_TEST(hp, HASHEADER) ? Qtrue : Qfalse;
}

#hijacked!Object



4146
4147
4148
4149
4150
4151
4152
4153
# File 'ext/pitchfork_http/pitchfork_http.c', line 4146

static VALUE HttpParser_hijacked_bang(VALUE self)
{
  struct http_parser *hp = data_get(self);

  HP_FL_SET(hp, HIJACK);

  return self;
}

#hijacked?Boolean

Returns:

  • (Boolean)


106
107
108
# File 'lib/pitchfork/http_parser.rb', line 106

def hijacked?
  env.include?('rack.hijack_io'.freeze)
end

#keepalive?Boolean

This should be used to detect if a request can really handle keepalives and pipelining. Currently, the rules are:

  1. MUST be a GET or HEAD request

  2. MUST be HTTP/1.1 or HTTP/1.0 with “Connection: keep-alive”

  3. MUST NOT have “Connection: close” set

Returns:

  • (Boolean)


4096
4097
4098
4099
4100
4101
# File 'ext/pitchfork_http/pitchfork_http.c', line 4096

static VALUE HttpParser_keepalive(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_ALL(hp, KEEPALIVE) ? Qtrue : Qfalse;
}

#next?Boolean

Exactly like HttpParser#keepalive?, except it will reset the internal parser state on next parse if it returns true.

Returns:

  • (Boolean)


4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
# File 'ext/pitchfork_http/pitchfork_http.c', line 4110

static VALUE HttpParser_next(VALUE self)
{
  struct http_parser *hp = data_get(self);

  if (HP_FL_ALL(hp, KEEPALIVE)) {
    HP_FL_SET(hp, TO_CLEAR);
    return Qtrue;
  }
  return Qfalse;
}

#parsenil

Takes a Hash and a String of data, parses the String of data filling in the Hash returning the Hash if parsing is finished, nil otherwise When returning the env Hash, it may modify data to point to where body processing should begin.

Raises HttpParserError if there are parsing errors.

Returns:

  • (nil)


3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
# File 'ext/pitchfork_http/pitchfork_http.c', line 3993

static VALUE HttpParser_parse(VALUE self)
{
  struct http_parser *hp = data_get(self);
  VALUE data = hp->buf;

  if (HP_FL_TEST(hp, TO_CLEAR))
    HttpParser_clear(self);

  http_parser_execute(self, hp, RSTRING_PTR(data), RSTRING_LEN(data));
  if (hp->offset > MAX_HEADER_LEN)
    parser_raise(e413, "HTTP header is too large");

  if (hp->cs == http_parser_first_final ||
      hp->cs == http_parser_en_ChunkedBody) {
    advance_str(data, hp->offset + 1);
    hp->offset = 0;
    if (HP_FL_TEST(hp, INTRAILER))
      HP_FL_SET(hp, REQEOF);

    return hp->env;
  }

  if (hp->cs == http_parser_error)
    parser_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");

  return Qnil;
}

#read(socket) ⇒ Object

Does the majority of the IO processing. It has been written in Ruby using about 8 different IO processing strategies.

It is currently carefully constructed to make sure that it gets the best possible performance for the common case: GET requests that are fully complete after a single read(2)

Anyone who thinks they can make it faster is more than welcome to take a crack at it.

returns an environment hash suitable for Rack if successful This does minimal exception trapping and it is up to the caller to handle any socket errors (e.g. user aborted upload).



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/pitchfork/http_parser.rb', line 62

def read(socket)
  e = env

  # From https://www.ietf.org/rfc/rfc3875:
  # "Script authors should be aware that the REMOTE_ADDR and
  #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
  #  may not identify the ultimate source of the request.  They
  #  identify the client for the immediate request to the server;
  #  that client may be a proxy, gateway, or other intermediary
  #  acting on behalf of the actual source client."
  address = socket.remote_address
  e['REMOTE_ADDR'] = if address.unix?
    "127.0.0.1"
  else
    address.ip_address
  end

  # short circuit the common case with small GET requests first
  socket.readpartial(16384, buf)
  if parse.nil?
    # Parser is not done, queue up more data to read and continue parsing
    # an Exception thrown from the parser will throw us out of the loop
    false until add_parse(socket.readpartial(16384))
  end

  check_client_connection(socket) if @@check_client_connection

  e['rack.input'] = 0 == content_length ?
                    NULL_IO : @@input_class.new(socket, self)

  # for Rack hijacking in Rack 1.5 and later
  e['pitchfork.socket'] = socket
  e['rack.hijack'] = self

  e.merge!(DEFAULTS)
end

#response_start_sentObject

ignored by Ruby anyways



4245
4246
4247
4248
4249
4250
# File 'ext/pitchfork_http/pitchfork_http.c', line 4245

static VALUE HttpParser_rssget(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_TEST(hp, RESSTART) ? Qtrue : Qfalse;
}

#response_start_sent=(boolean) ⇒ Object



4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
# File 'ext/pitchfork_http/pitchfork_http.c', line 4233

static VALUE HttpParser_rssset(VALUE self, VALUE boolean)
{
  struct http_parser *hp = data_get(self);

  if (RTEST(boolean))
    HP_FL_SET(hp, RESSTART);
  else
    HP_FL_UNSET(hp, RESSTART);

  return boolean; /* ignored by Ruby anyways */
}

#trailers(req, data) ⇒ nil

This is an alias for HttpParser#headers

Returns:

  • (nil)


4053
4054
4055
4056
4057
4058
4059
4060
4061
# File 'ext/pitchfork_http/pitchfork_http.c', line 4053

static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf)
{
  struct http_parser *hp = data_get(self);

  RB_OBJ_WRITE(self, &hp->buf, buf);
  RB_OBJ_WRITE(self, &hp->env, env);

  return HttpParser_parse(self);
}

#write_http_header(socket) ⇒ Object

:nodoc:



187
188
189
190
191
192
# File 'lib/pitchfork/http_parser.rb', line 187

def write_http_header(socket) # :nodoc:
  if headers?
    self.response_start_sent = true
    HTTP_RESPONSE_START.each { |c| socket.write(c) }
  end
end