Class: Pitchfork::HttpParser
- Inherits:
-
Object
- Object
- Pitchfork::HttpParser
- Defined in:
- lib/pitchfork/http_parser.rb,
ext/pitchfork_http/pitchfork_http.c
Constant Summary collapse
- DEFAULTS =
default parameters we merge into the request env for Rack handlers
{ "rack.errors" => $stderr, "rack.multiprocess" => true, "rack.multithread" => false, "rack.run_once" => false, "rack.version" => [1, 2], "rack.hijack?" => true, "SCRIPT_NAME" => "", # this is not in the Rack spec, but some apps may rely on it "SERVER_SOFTWARE" => "Pitchfork #{Pitchfork::Const::UNICORN_VERSION}" }
- NULL_IO =
StringIO.new("")
- HTTP_RESPONSE_START =
:stopdoc:
[ 'HTTP'.freeze, '/1.1 '.freeze ]
- EMPTY_ARRAY =
[].freeze
- TCPI =
Raindrops::TCP_Info.allocate
- CHUNK_MAX =
The maximum size a single chunk when using chunked transfer encoding. This is only a theoretical maximum used to detect errors in clients, it is highly unlikely to encounter clients that send more than several kilobytes at once.
OFFT2NUM(UH_OFF_T_MAX)
- LENGTH_MAX =
The maximum size of the body as specified by Content-Length. This is only a theoretical maximum, the actual limit is subject to the limits of the file system used for
Dir.tmpdir. OFFT2NUM(UH_OFF_T_MAX)
- @@input_class =
Pitchfork::TeeInput
- @@check_client_connection =
false- @@tcpi_inspect_ok =
Socket.const_defined?(:TCP_INFO)
Class Method Summary collapse
- .check_client_connection ⇒ Object
- .check_client_connection=(bool) ⇒ Object
- .input_class ⇒ Object
- .input_class=(klass) ⇒ Object
-
.is_chunked?(v) ⇒ Boolean
called by ext/pitchfork_http/pitchfork_http.rl via rb_funcall.
-
.max_header_len=(len) ⇒ Object
this is only intended for use with Rainbows!.
Instance Method Summary collapse
-
#add_parse(buffer) ⇒ nil
adds the contents of
bufferto the internal buffer and attempts to continue parsing. -
#body_eof? ⇒ Boolean
Detects if we’re done filtering the body or not.
- #buf ⇒ Object
-
#call ⇒ Object
for rack.hijack, we respond to this method so no extra allocation of a proc object.
-
#check_client_connection(socket) ⇒ Object
Ruby 2.2+ can show struct tcp_info as a string Socket::Option#inspect.
-
#clear ⇒ Object
Resets the parser to it’s initial state so that you can reuse it rather than making new ones.
-
#closed_state?(state) ⇒ Boolean
raindrops before 0.18 only supported TCP_INFO under Linux.
- #closed_state_str?(state) ⇒ Boolean
-
#content_length ⇒ nil, Integer
Returns the number of bytes left to run through HttpParser#filter_body.
- #env ⇒ Object
-
#filter_body(dst, src) ⇒ Object
Takes a String of
src, will modify data if dechunking is done. - #headers(env, buf) ⇒ Object
-
#headers? ⇒ Boolean
This should be used to detect if a request has headers (and if the response will have headers as well).
- #hijacked! ⇒ Object
- #hijacked? ⇒ Boolean
-
#new ⇒ Object
constructor
Creates a new parser.
-
#keepalive? ⇒ Boolean
This should be used to detect if a request can really handle keepalives and pipelining.
-
#next? ⇒ Boolean
Exactly like HttpParser#keepalive?, except it will reset the internal parser state on next parse if it returns true.
-
#parse ⇒ nil
Takes a Hash and a String of data, parses the String of data filling in the Hash returning the Hash if parsing is finished, nil otherwise When returning the env Hash, it may modify data to point to where body processing should begin.
-
#read(socket) ⇒ Object
Does the majority of the IO processing.
-
#response_start_sent ⇒ Object
ignored by Ruby anyways.
- #response_start_sent=(boolean) ⇒ Object
-
#trailers(req, data) ⇒ nil
This is an alias for HttpParser#headers.
-
#write_http_header(socket) ⇒ Object
:nodoc:.
Constructor Details
#new ⇒ Object
Creates a new parser.
3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3914 static VALUE HttpParser_init(VALUE self) { struct http_parser *hp = data_get(self); http_parser_init(hp); RB_OBJ_WRITE(self, &hp->buf, rb_str_new(NULL, 0)); RB_OBJ_WRITE(self, &hp->env, rb_hash_new()); return self; } |
Class Method Details
.check_client_connection ⇒ Object
39 40 41 |
# File 'lib/pitchfork/http_parser.rb', line 39 def self.check_client_connection @@check_client_connection end |
.check_client_connection=(bool) ⇒ Object
43 44 45 |
# File 'lib/pitchfork/http_parser.rb', line 43 def self.check_client_connection=(bool) @@check_client_connection = bool end |
.input_class ⇒ Object
31 32 33 |
# File 'lib/pitchfork/http_parser.rb', line 31 def self.input_class @@input_class end |
.input_class=(klass) ⇒ Object
35 36 37 |
# File 'lib/pitchfork/http_parser.rb', line 35 def self.input_class=(klass) @@input_class = klass end |
.is_chunked?(v) ⇒ Boolean
called by ext/pitchfork_http/pitchfork_http.rl via rb_funcall
195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/pitchfork/http_parser.rb', line 195 def self.is_chunked?(v) # :nodoc: vals = v.split(',') vals.each do |val| val.strip! val.downcase! end if vals.pop == 'chunked'.freeze return true unless vals.include?('chunked'.freeze) raise Pitchfork::HttpParserError, 'double chunked', [] end return false unless vals.include?('chunked'.freeze) raise Pitchfork::HttpParserError, 'chunked not last', [] end |
.max_header_len=(len) ⇒ Object
this is only intended for use with Rainbows!
41 42 43 44 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 41 static VALUE set_maxhdrlen(VALUE self, VALUE len) { return UINT2NUM(MAX_HEADER_LEN = NUM2UINT(len)); } |
Instance Method Details
#add_parse(buffer) ⇒ nil
adds the contents of buffer to the internal buffer and attempts to continue parsing. Returns the env Hash on success or nil if more data is needed.
Raises HttpParserError if there are parsing errors.
4032 4033 4034 4035 4036 4037 4038 4039 4040 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4032 static VALUE HttpParser_add_parse(VALUE self, VALUE buffer) { struct http_parser *hp = data_get(self); Check_Type(buffer, T_STRING); rb_str_buf_append(hp->buf, buffer); return HttpParser_parse(self); } |
#body_eof? ⇒ Boolean
Detects if we’re done filtering the body or not. This can be used to detect when to stop calling HttpParser#filter_body.
4075 4076 4077 4078 4079 4080 4081 4082 4083 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4075 static VALUE HttpParser_body_eof(VALUE self) { struct http_parser *hp = data_get(self); if (HP_FL_TEST(hp, CHUNKED)) return chunked_eof(hp) ? Qtrue : Qfalse; return hp->len.content == 0 ? Qtrue : Qfalse; } |
#buf ⇒ Object
4136 4137 4138 4139 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4136 static VALUE HttpParser_buf(VALUE self) { return data_get(self)->buf; } |
#call ⇒ Object
for rack.hijack, we respond to this method so no extra allocation of a proc object
101 102 103 104 |
# File 'lib/pitchfork/http_parser.rb', line 101 def call hijacked! env['rack.hijack_io'] = env['pitchfork.socket'] end |
#check_client_connection(socket) ⇒ Object
Ruby 2.2+ can show struct tcp_info as a string Socket::Option#inspect. Not that efficient, but probably still better than doing unnecessary work after a client gives up.
113 114 115 116 117 118 119 120 121 |
# File 'lib/pitchfork/http_parser.rb', line 113 def check_client_connection(socket) # :nodoc: if TCPSocket === socket # Raindrops::TCP_Info#get!, #state (reads struct tcp_info#tcpi_state) raise Errno::EPIPE, "client closed connection".freeze, EMPTY_ARRAY if closed_state?(TCPI.get!(socket).state) else write_http_header(socket) end end |
#clear ⇒ Object
Resets the parser to it’s initial state so that you can reuse it rather than making new ones.
3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3932 static VALUE HttpParser_clear(VALUE self) { struct http_parser *hp = data_get(self); /* we can't safely reuse .buf and .env if hijacked */ if (HP_FL_TEST(hp, HIJACK)) return HttpParser_init(self); http_parser_init(hp); rb_hash_clear(hp->env); return self; } |
#closed_state?(state) ⇒ Boolean
raindrops before 0.18 only supported TCP_INFO under Linux
142 143 144 145 146 147 148 149 150 151 |
# File 'lib/pitchfork/http_parser.rb', line 142 def closed_state?(state) # :nodoc: case state when 1 # ESTABLISHED false when 8, 6, 7, 9, 11 # CLOSE_WAIT, TIME_WAIT, CLOSE, LAST_ACK, CLOSING true else false end end |
#closed_state_str?(state) ⇒ Boolean
174 175 176 177 178 179 180 181 182 183 184 |
# File 'lib/pitchfork/http_parser.rb', line 174 def closed_state_str?(state) case state when 'ESTABLISHED' false # not a typo, ruby maps TCP_CLOSE (no 'D') to state=CLOSED (w/ 'D') when 'CLOSE_WAIT', 'TIME_WAIT', 'CLOSED', 'LAST_ACK', 'CLOSING' true else false end end |
#content_length ⇒ nil, Integer
Returns the number of bytes left to run through HttpParser#filter_body. This will initially be the value of the “Content-Length” HTTP header after header parsing is complete and will decrease in value as HttpParser#filter_body is called for each chunk. This should return zero for requests with no body.
This will return nil on “Transfer-Encoding: chunked” requests.
3974 3975 3976 3977 3978 3979 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3974 static VALUE HttpParser_content_length(VALUE self) { struct http_parser *hp = data_get(self); return HP_FL_TEST(hp, CHUNKED) ? Qnil : OFFT2NUM(hp->len.content); } |
#env ⇒ Object
4141 4142 4143 4144 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4141 static VALUE HttpParser_env(VALUE self) { return data_get(self)->env; } |
#filter_body(dst, src) ⇒ Object
Takes a String of src, will modify data if dechunking is done. Returns nil if there is more data left to process. Returns src if body processing is complete. When returning src, it may modify src so the start of the string points to where the body ended so that trailer processing can begin.
Raises HttpParserError if there are dechunking errors. Basically this is a glorified memcpy(3) that copies src into buf while filtering it through the dechunker.
4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4169 static VALUE HttpParser_filter_body(VALUE self, VALUE dst, VALUE src) { struct http_parser *hp = data_get(self); char *srcptr; long srclen; srcptr = RSTRING_PTR(src); srclen = RSTRING_LEN(src); StringValue(dst); if (HP_FL_TEST(hp, CHUNKED)) { if (!chunked_eof(hp)) { rb_str_modify(dst); rb_str_resize(dst, srclen); /* we can never copy more than srclen bytes */ hp->s.dest_offset = 0; RB_OBJ_WRITE(self, &hp->cont, dst); RB_OBJ_WRITE(self, &hp->buf, src); http_parser_execute(self, hp, srcptr, srclen); if (hp->cs == http_parser_error) parser_raise(eHttpParserError, "Invalid HTTP format, parsing fails."); assert(hp->s.dest_offset <= hp->offset && "destination buffer overflow"); advance_str(src, hp->offset); rb_str_set_len(dst, hp->s.dest_offset); if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) { assert(hp->len.chunk == 0 && "chunk at EOF but more to parse"); } else { src = Qnil; } } } else { /* no need to enter the Ragel machine for unchunked transfers */ assert(hp->len.content >= 0 && "negative Content-Length"); if (hp->len.content > 0) { long nr = MIN(srclen, hp->len.content); rb_str_modify(dst); rb_str_resize(dst, nr); /* * using rb_str_replace() to avoid memcpy() doesn't help in * most cases because a GC-aware programmer will pass an explicit * buffer to env["rack.input"].read and reuse the buffer in a loop. * This causes copy-on-write behavior to be triggered anyways * when the +src+ buffer is modified (when reading off the socket). */ RB_OBJ_WRITE(self, &hp->buf, src); memcpy(RSTRING_PTR(dst), srcptr, nr); hp->len.content -= nr; if (hp->len.content == 0) { HP_FL_SET(hp, REQEOF); hp->cs = http_parser_first_final; } advance_str(src, nr); src = Qnil; } } hp->offset = 0; /* for trailer parsing */ return src; } |
#headers(env, buf) ⇒ Object
4053 4054 4055 4056 4057 4058 4059 4060 4061 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4053 static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf) { struct http_parser *hp = data_get(self); RB_OBJ_WRITE(self, &hp->buf, buf); RB_OBJ_WRITE(self, &hp->env, env); return HttpParser_parse(self); } |
#headers? ⇒ Boolean
This should be used to detect if a request has headers (and if the response will have headers as well). HTTP/0.9 requests should return false, all subsequent HTTP versions will return true
4129 4130 4131 4132 4133 4134 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4129 static VALUE HttpParser_has_headers(VALUE self) { struct http_parser *hp = data_get(self); return HP_FL_TEST(hp, HASHEADER) ? Qtrue : Qfalse; } |
#hijacked! ⇒ Object
4146 4147 4148 4149 4150 4151 4152 4153 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4146 static VALUE HttpParser_hijacked_bang(VALUE self) { struct http_parser *hp = data_get(self); HP_FL_SET(hp, HIJACK); return self; } |
#hijacked? ⇒ Boolean
106 107 108 |
# File 'lib/pitchfork/http_parser.rb', line 106 def hijacked? env.include?('rack.hijack_io'.freeze) end |
#keepalive? ⇒ Boolean
This should be used to detect if a request can really handle keepalives and pipelining. Currently, the rules are:
-
MUST be a GET or HEAD request
-
MUST be HTTP/1.1
orHTTP/1.0 with “Connection: keep-alive” -
MUST NOT have “Connection: close” set
4096 4097 4098 4099 4100 4101 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4096 static VALUE HttpParser_keepalive(VALUE self) { struct http_parser *hp = data_get(self); return HP_FL_ALL(hp, KEEPALIVE) ? Qtrue : Qfalse; } |
#next? ⇒ Boolean
Exactly like HttpParser#keepalive?, except it will reset the internal parser state on next parse if it returns true.
4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4110 static VALUE HttpParser_next(VALUE self) { struct http_parser *hp = data_get(self); if (HP_FL_ALL(hp, KEEPALIVE)) { HP_FL_SET(hp, TO_CLEAR); return Qtrue; } return Qfalse; } |
#parse ⇒ nil
Takes a Hash and a String of data, parses the String of data filling in the Hash returning the Hash if parsing is finished, nil otherwise When returning the env Hash, it may modify data to point to where body processing should begin.
Raises HttpParserError if there are parsing errors.
3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3993 static VALUE HttpParser_parse(VALUE self) { struct http_parser *hp = data_get(self); VALUE data = hp->buf; if (HP_FL_TEST(hp, TO_CLEAR)) HttpParser_clear(self); http_parser_execute(self, hp, RSTRING_PTR(data), RSTRING_LEN(data)); if (hp->offset > MAX_HEADER_LEN) parser_raise(e413, "HTTP header is too large"); if (hp->cs == http_parser_first_final || hp->cs == http_parser_en_ChunkedBody) { advance_str(data, hp->offset + 1); hp->offset = 0; if (HP_FL_TEST(hp, INTRAILER)) HP_FL_SET(hp, REQEOF); return hp->env; } if (hp->cs == http_parser_error) parser_raise(eHttpParserError, "Invalid HTTP format, parsing fails."); return Qnil; } |
#read(socket) ⇒ Object
Does the majority of the IO processing. It has been written in Ruby using about 8 different IO processing strategies.
It is currently carefully constructed to make sure that it gets the best possible performance for the common case: GET requests that are fully complete after a single read(2)
Anyone who thinks they can make it faster is more than welcome to take a crack at it.
returns an environment hash suitable for Rack if successful This does minimal exception trapping and it is up to the caller to handle any socket errors (e.g. user aborted upload).
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/pitchfork/http_parser.rb', line 62 def read(socket) e = env # From https://www.ietf.org/rfc/rfc3875: # "Script authors should be aware that the REMOTE_ADDR and # REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9) # may not identify the ultimate source of the request. They # identify the client for the immediate request to the server; # that client may be a proxy, gateway, or other intermediary # acting on behalf of the actual source client." address = socket.remote_address e['REMOTE_ADDR'] = if address.unix? "127.0.0.1" else address.ip_address end # short circuit the common case with small GET requests first socket.readpartial(16384, buf) if parse.nil? # Parser is not done, queue up more data to read and continue parsing # an Exception thrown from the parser will throw us out of the loop false until add_parse(socket.readpartial(16384)) end check_client_connection(socket) if @@check_client_connection e['rack.input'] = 0 == content_length ? NULL_IO : @@input_class.new(socket, self) # for Rack hijacking in Rack 1.5 and later e['pitchfork.socket'] = socket e['rack.hijack'] = self e.merge!(DEFAULTS) end |
#response_start_sent ⇒ Object
ignored by Ruby anyways
4245 4246 4247 4248 4249 4250 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4245 static VALUE HttpParser_rssget(VALUE self) { struct http_parser *hp = data_get(self); return HP_FL_TEST(hp, RESSTART) ? Qtrue : Qfalse; } |
#response_start_sent=(boolean) ⇒ Object
4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4233 static VALUE HttpParser_rssset(VALUE self, VALUE boolean) { struct http_parser *hp = data_get(self); if (RTEST(boolean)) HP_FL_SET(hp, RESSTART); else HP_FL_UNSET(hp, RESSTART); return boolean; /* ignored by Ruby anyways */ } |
#trailers(req, data) ⇒ nil
This is an alias for HttpParser#headers
4053 4054 4055 4056 4057 4058 4059 4060 4061 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4053 static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf) { struct http_parser *hp = data_get(self); RB_OBJ_WRITE(self, &hp->buf, buf); RB_OBJ_WRITE(self, &hp->env, env); return HttpParser_parse(self); } |
#write_http_header(socket) ⇒ Object
:nodoc:
187 188 189 190 191 192 |
# File 'lib/pitchfork/http_parser.rb', line 187 def write_http_header(socket) # :nodoc: if headers? self.response_start_sent = true HTTP_RESPONSE_START.each { |c| socket.write(c) } end end |