Class: HTTPTools::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/http_tools/parser.rb

Overview

HTTPTools::Parser is a pure Ruby HTTP request & response parser with an evented API.

The HTTP message can be fed in to the parser piece by piece as it comes over the wire, and the parser will call its callbacks as it works it’s way through the message.

Example:

parser = HTTPTools::Parser.new
parser.on(:header) do
  puts parser.status_code + " " + parser.message
  puts parser.header.inspect
end
parser.on(:finish) {print parser.body}

parser << "HTTP/1.1 200 OK\r\n"
parser << "Content-Length: 20\r\n\r\n"
parser << "<h1>Hello world</h1>"

Prints:

200 OK
{"Content-Length" => "20"}
<h1>Hello world</h1>

Constant Summary collapse

EMPTY =

:stopdoc:

"".freeze
COLON =
":".freeze
SPACE =
" ".freeze
KEY_TERMINATOR =
": ".freeze
CONTENT_LENGTH =
"Content-Length".freeze
TRANSFER_ENCODING =
"Transfer-Encoding".freeze
TRAILER =
"Trailer".freeze
CONNECTION =
"Connection".freeze
CLOSE =
"close".freeze
CHUNKED =
"chunked".freeze
REQUEST_METHOD =
"REQUEST_METHOD".freeze
PATH_INFO =
"PATH_INFO".freeze
QUERY_STRING =
"QUERY_STRING".freeze
SERVER_NAME =
"SERVER_NAME".freeze
SERVER_PORT =
"SERVER_PORT".freeze
HTTP_HOST =
"HTTP_HOST".freeze
RACK_INPUT =
"rack.input".freeze
PROTOTYPE_ENV =
{
"SCRIPT_NAME" => "".freeze,
"rack.version" => [1, 1].freeze,
"rack.url_scheme" => "http".freeze,
"rack.errors" => STDERR,
"rack.multithread" => false,
"rack.multiprocess" => false,
"rack.run_once" => false}.freeze
HTTP_ =
"HTTP_".freeze
LOWERCASE =
"a-z-".freeze
UPPERCASE =
"A-Z_".freeze
NO_HTTP_ =
{"CONTENT_LENGTH" => true, "CONTENT_TYPE" => true}
EVENTS =

:startdoc:

%W{header stream trailer finish error}.map {|e| e.freeze}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeParser

:call-seq: Parser.new -> parser

Create a new HTTPTools::Parser.



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/http_tools/parser.rb', line 87

def initialize
  @state = :start
  @buffer = @scanner = StringScanner.new("")
  @header = {}
  @trailer = {}
  @force_no_body = nil
  @allow_html_without_header = nil
  @force_trailer = nil
  @max_chunk_size = nil
  @status_code = nil
  @header_complete = nil
  @content_left = nil
  @chunked = nil
  @body = nil
  @header_callback = nil
  @stream_callback = method(:setup_stream_callback)
  @trailer_callback = nil
  @finish_callback = nil
  @error_callback = nil
end

Instance Attribute Details

#allow_html_without_headerObject

Allow responses with no status line or headers if it looks like HTML.



78
79
80
# File 'lib/http_tools/parser.rb', line 78

def allow_html_without_header
  @allow_html_without_header
end

#bodyObject (readonly)

Returns the value of attribute body.



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def body
  @body
end

#force_no_bodyObject

Skip parsing the body, e.g. with the response to a HEAD request.



75
76
77
# File 'lib/http_tools/parser.rb', line 75

def force_no_body
  @force_no_body
end

#force_trailerObject

Force parser to expect and parse a trailer when Trailer header missing.



72
73
74
# File 'lib/http_tools/parser.rb', line 72

def force_trailer
  @force_trailer
end

#headerObject (readonly)

Returns the value of attribute header.



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def header
  @header
end

#max_chunk_sizeObject

Max size for a ‘Transfer-Encoding: chunked’ body chunk. nil for no limit.



81
82
83
# File 'lib/http_tools/parser.rb', line 81

def max_chunk_size
  @max_chunk_size
end

#messageObject (readonly)

Returns the value of attribute message.



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def message
  @message
end

#path_infoObject (readonly)

Returns the value of attribute path_info.



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def path_info
  @path_info
end

#query_stringObject (readonly)

Returns the value of attribute query_string.



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def query_string
  @query_string
end

#request_methodObject (readonly)

Returns the value of attribute request_method.



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def request_method
  @request_method
end

#request_uriObject (readonly)

Returns the value of attribute request_uri.



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def request_uri
  @request_uri
end

#stateObject (readonly)

:nodoc:



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def state
  @state
end

#status_codeObject (readonly)

Returns the value of attribute status_code.



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def status_code
  @status_code
end

#trailerObject (readonly)

Returns the value of attribute trailer.



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def trailer
  @trailer
end

#versionObject (readonly)

Returns the value of attribute version.



68
69
70
# File 'lib/http_tools/parser.rb', line 68

def version
  @version
end

Instance Method Details

#add_listener(event, proc = nil, &block) ⇒ Object Also known as: on

:call-seq: parser.add_listener(event) {|arg| block} -> parser parser.add_listener(event, proc) -> parser parser.on(event) {|arg| block} -> parser parser.on(event, proc) -> parser

Available events are :header, :stream, :trailer, :finish, and :error.

Adding a second callback for an event will overwite the existing callback.

Events:

header

Called when headers are complete

stream

Supplied with one argument, the last chunk of body data fed in to the parser as a String, e.g. “<h1>Hello”. If no listener is set for this event the body can be retrieved with #body

trailer

Called on the completion of the trailer, if present

finish

Called on completion of the entire message. Any unconsumed data (such as the start of the next message with keepalive) can be retrieved with #rest

error

Supplied with one argument, an error encountered while parsing as a HTTPTools::ParseError. If a listener isn’t registered for this event, an exception will be raised when an error is encountered



293
294
295
296
# File 'lib/http_tools/parser.rb', line 293

def add_listener(event, proc=nil, &block)
  instance_variable_set(:"@#{event}_callback", proc || block)
  self
end

#concat(data) ⇒ Object Also known as: <<

:call-seq: parser.concat(data) -> parser parser << data -> parser

Feed data in to the parser and trigger callbacks.

Will raise HTTPTools::ParseError on error, unless a callback has been set for the :error event, in which case the callback will recieve the error insted.



117
118
119
120
121
# File 'lib/http_tools/parser.rb', line 117

def concat(data)
  @buffer << data
  @state = send(@state)
  self
end

#envObject

:call-seq: parser.env -> hash or nil

Returns a Rack compatible environment hash. Will return nil if called before headers are complete.

“SERVER_NAME” and “SERVER_PORT” are only supplied if they can be determined from the request (eg, they are present in the “Host” header).

“rack.input” is only supplied if #env is called after parsing the request has finsished, and no listener is set for the ‘stream` event

If not supplied, you must ensure “SERVER_NAME”, “SERVER_PORT”, and “rack.input” are present to make the environment hash fully Rack compliant



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/http_tools/parser.rb', line 138

def env
  return unless @header_complete
  env = PROTOTYPE_ENV.dup
  env[REQUEST_METHOD] = @request_method.upcase
  env[PATH_INFO] = @path_info
  env[QUERY_STRING] = @query_string
  @header.each do |key, value|
    upper_key = key.tr(LOWERCASE, UPPERCASE)
    upper_key[0,0] = HTTP_ unless NO_HTTP_.key?(upper_key)
    env[upper_key.freeze] = value
  end
  host, port = env[HTTP_HOST].split(COLON) if env.key?(HTTP_HOST)
  env[SERVER_NAME] = host if host
  env[SERVER_PORT] = port if port
  @trailer.each {|k, val| env[HTTP_ + k.tr(LOWERCASE, UPPERCASE)] = val}
  if @body || @stream_callback == method(:setup_stream_callback)
    env[RACK_INPUT] = StringIO.new(@body || "")
  end
  env
end

#finishObject

:call-seq: parser.finish -> parser

Used to notify the parser that the request has finished in a case where it can not be determined by the request itself.

For example, when a server does not set a content length, and instead relies on closing the connection to signify the body end.

until parser.finished?
  begin
    parser << socket.sysread(1024 * 16)
  rescue EOFError
    parser.finish
    break
  end
end

This method can not be used to interrupt parsing from within a callback.

Will raise HTTPTools::MessageIncompleteError if called too early, or HTTPTools::EndOfMessageError if the message has already finished, unless a callback has been set for the :error event, in which case the callback will recieve the error insted.



182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/http_tools/parser.rb', line 182

def finish
  if @state == :body_on_close
    @buffer = @scanner
    @state = end_of_message
  elsif @state == :body_chunked && @buffer.eos? && !@trailer_expected &&
    @header.any? {|k,v| CONNECTION.casecmp(k) == 0 && CLOSE.casecmp(v) == 0}
    @state = end_of_message
  elsif @state == :start && @buffer.string.length < 1
    raise EmptyMessageError.new("Message empty")
  else
    raise MessageIncompleteError.new("Message ended early")
  end
  self
end

#finished?Boolean

:call-seq: parser.finished? -> bool

Returns true when the parser has come to the end of the message, false otherwise.

Some HTTP servers may not supply the necessary information in the response to determine the end of the message (e.g., no content length) and insted close the connection to signify the end of the message, see #finish for how to deal with this.

Returns:

  • (Boolean)


207
208
209
# File 'lib/http_tools/parser.rb', line 207

def finished?
  @state == :end_of_message
end

#header?Boolean

:call-seq: parser.header? -> bool

Returns true when the parser has received the complete header, false otherwise.

Returns:

  • (Boolean)


216
217
218
# File 'lib/http_tools/parser.rb', line 216

def header?
  @header_complete
end

#inspectObject

:nodoc:



299
300
301
# File 'lib/http_tools/parser.rb', line 299

def inspect # :nodoc:
  super.sub(/ .*>$/, " #{posstr} #{state}>")
end

#resetObject

:call-seq: parser.reset -> parser

Reset the parser so it can be used to process a new request. Callbacks/delegates will not be removed.



241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/http_tools/parser.rb', line 241

def reset
  @state = :start
  @buffer.string.replace("")
  @buffer.reset
  @request_method = nil
  @path_info = nil
  @query_string = nil
  @request_uri = nil
  @version = nil
  @status_code = nil
  @header_complete = nil
  @header = {}
  @trailer = {}
  @last_key = nil
  @content_left = nil
  @chunked = nil
  @trailer_expected = nil
  @body = nil
  if @stream_callback == method(:stream_callback)
    @stream_callback = method(:setup_stream_callback)
  end
  self
end

#restObject

:call-seq: parser.rest -> string

Returns unconsumed data in the parser’s buffer.



224
225
226
# File 'lib/http_tools/parser.rb', line 224

def rest
  @buffer.rest
end

#rest_sizeObject

:call-seq: parser.rest_size -> int

Returns the size in bytes of the unconsumed data in the parser’s buffer.



232
233
234
# File 'lib/http_tools/parser.rb', line 232

def rest_size
  @buffer.rest_size
end