Class: HTTPTools::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/http_tools/parser.rb

Overview

HTTPTools::Parser is a pure Ruby HTTP request & response parser with an evented API.

The HTTP message can be fed in to the parser piece by piece as it comes over the wire, and the parser will call its callbacks as it works it’s way through the message.

Example:

parser = HTTPTools::Parser.new
parser.on(:header) do
  puts parser.status_code + " " + parser.message
  puts parser.header.inspect
end
parser.on(:finish) {print parser.body}

parser << "HTTP/1.1 200 OK\r\n"
parser << "Content-Length: 20\r\n\r\n"
parser << "<h1>Hello world</h1>"

Prints:

200 OK
{"Content-Length" => "20"}
<h1>Hello world</h1>

Constant Summary collapse

EMPTY =

:stopdoc:

"".freeze
COLON =
":".freeze
KEY_TERMINATOR =
": ".freeze
CONTENT_LENGTH =
"Content-Length".freeze
TRANSFER_ENCODING =
"Transfer-Encoding".freeze
TRAILER =
"Trailer".freeze
CONNECTION =
"Connection".freeze
CLOSE =
"close".freeze
CHUNKED =
"chunked".freeze
REQUEST_METHOD =
"REQUEST_METHOD".freeze
PATH_INFO =
"PATH_INFO".freeze
QUERY_STRING =
"QUERY_STRING".freeze
SERVER_NAME =
"SERVER_NAME".freeze
SERVER_PORT =
"SERVER_PORT".freeze
HTTP_HOST =
"HTTP_HOST".freeze
RACK_INPUT =
"rack.input".freeze
PROTOTYPE_ENV =
{
"SCRIPT_NAME" => "".freeze,
"rack.version" => [1, 1].freeze,
"rack.url_scheme" => "http".freeze,
"rack.errors" => STDERR,
"rack.multithread" => false,
"rack.multiprocess" => false,
"rack.run_once" => false}.freeze
HTTP_ =
"HTTP_".freeze
LOWERCASE =
"a-z-".freeze
UPPERCASE =
"A-Z_".freeze
NO_HTTP_ =
{"CONTENT_LENGTH" => true, "CONTENT_TYPE" => true}
EVENTS =

:startdoc:

%W{header stream trailer finish error}.map {|e| e.freeze}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeParser

:call-seq: Parser.new -> parser

Create a new HTTPTools::Parser.



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/http_tools/parser.rb', line 86

def initialize
  @state = :start
  @buffer = @scanner = StringScanner.new("")
  @header = {}
  @trailer = {}
  @force_no_body = nil
  @allow_html_without_header = nil
  @force_trailer = nil
  @max_chunk_size = nil
  @status_code = nil
  @header_complete = nil
  @content_left = nil
  @chunked = nil
  @body = nil
  @header_callback = nil
  @stream_callback = method(:setup_stream_callback)
  @trailer_callback = nil
  @finish_callback = nil
  @error_callback = nil
end

Instance Attribute Details

#allow_html_without_headerObject

Allow responses with no status line or headers if it looks like HTML.



77
78
79
# File 'lib/http_tools/parser.rb', line 77

def allow_html_without_header
  @allow_html_without_header
end

#bodyObject (readonly)

Returns the value of attribute body.



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def body
  @body
end

#force_no_bodyObject

Skip parsing the body, e.g. with the response to a HEAD request.



74
75
76
# File 'lib/http_tools/parser.rb', line 74

def force_no_body
  @force_no_body
end

#force_trailerObject

Force parser to expect and parse a trailer when Trailer header missing.



71
72
73
# File 'lib/http_tools/parser.rb', line 71

def force_trailer
  @force_trailer
end

#headerObject (readonly)

Returns the value of attribute header.



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def header
  @header
end

#max_chunk_sizeObject

Max size for a ‘Transfer-Encoding: chunked’ body chunk. nil for no limit.



80
81
82
# File 'lib/http_tools/parser.rb', line 80

def max_chunk_size
  @max_chunk_size
end

#messageObject (readonly)

Returns the value of attribute message.



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def message
  @message
end

#path_infoObject (readonly)

Returns the value of attribute path_info.



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def path_info
  @path_info
end

#query_stringObject (readonly)

Returns the value of attribute query_string.



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def query_string
  @query_string
end

#request_methodObject (readonly)

Returns the value of attribute request_method.



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def request_method
  @request_method
end

#request_uriObject (readonly)

Returns the value of attribute request_uri.



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def request_uri
  @request_uri
end

#stateObject (readonly)

:nodoc:



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def state
  @state
end

#status_codeObject (readonly)

Returns the value of attribute status_code.



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def status_code
  @status_code
end

#trailerObject (readonly)

Returns the value of attribute trailer.



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def trailer
  @trailer
end

#versionObject (readonly)

Returns the value of attribute version.



67
68
69
# File 'lib/http_tools/parser.rb', line 67

def version
  @version
end

Instance Method Details

#add_listener(event, proc = nil, &block) ⇒ Object Also known as: on

:call-seq: parser.add_listener(event) {|arg| block} -> parser parser.add_listener(event, proc) -> parser parser.on(event) {|arg| block} -> parser parser.on(event, proc) -> parser

Available events are :header, :stream, :trailer, :finish, and :error.

Adding a second callback for an event will overwite the existing callback.

Events:

header

Called when headers are complete

stream

Supplied with one argument, the last chunk of body data fed in to the parser as a String, e.g. “<h1>Hello”. If no listener is set for this event the body can be retrieved with #body

trailer

Called on the completion of the trailer, if present

finish

Called on completion of the entire message. Any unconsumed data (such as the start of the next message with keepalive) can be retrieved with #rest

error

Supplied with one argument, an error encountered while parsing as a HTTPTools::ParseError. If a listener isn’t registered for this event, an exception will be raised when an error is encountered



287
288
289
290
# File 'lib/http_tools/parser.rb', line 287

def add_listener(event, proc=nil, &block)
  instance_variable_set(:"@#{event}_callback", proc || block)
  self
end

#concat(data) ⇒ Object Also known as: <<

:call-seq: parser.concat(data) -> parser parser << data -> parser

Feed data in to the parser and trigger callbacks.

Will raise HTTPTools::ParseError on error, unless a callback has been set for the :error event, in which case the callback will recieve the error insted.



116
117
118
119
120
# File 'lib/http_tools/parser.rb', line 116

def concat(data)
  @buffer << data
  @state = send(@state)
  self
end

#envObject

:call-seq: parser.env -> hash or nil

Returns a Rack compatible environment hash. Will return nil if called before headers are complete.

“rack.input” is only supplied if #env is called after parsing the request has finsished, and no listener is set for the ‘stream` event, otherwise you must add it yourself to make the environment hash fully Rack compliant



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/http_tools/parser.rb', line 132

def env
  return unless @header_complete
  env = PROTOTYPE_ENV.dup
  env[REQUEST_METHOD] = @request_method.upcase
  env[PATH_INFO] = @path_info
  env[QUERY_STRING] = @query_string
  @header.each do |key, value|
    upper_key = key.tr(LOWERCASE, UPPERCASE)
    upper_key[0,0] = HTTP_ unless NO_HTTP_.key?(upper_key)
    env[upper_key.freeze] = value
  end
  host, port = env[HTTP_HOST].split(COLON) if env.key?(HTTP_HOST)
  env[SERVER_NAME] = host
  env[SERVER_PORT] = port || "80"
  @trailer.each {|k, val| env[HTTP_ + k.tr(LOWERCASE, UPPERCASE)] = val}
  if @body || @stream_callback == method(:setup_stream_callback)
    env[RACK_INPUT] = StringIO.new(@body || "")
  end
  env
end

#finishObject

:call-seq: parser.finish -> parser

Used to notify the parser that the request has finished in a case where it can not be determined by the request itself.

For example, when a server does not set a content length, and instead relies on closing the connection to signify the body end.

until parser.finished?
  begin
    parser << socket.sysread(1024 * 16)
  rescue EOFError
    parser.finish
    break
  end
end

This method can not be used to interrupt parsing from within a callback.

Will raise HTTPTools::MessageIncompleteError if called too early, or HTTPTools::EndOfMessageError if the message has already finished, unless a callback has been set for the :error event, in which case the callback will recieve the error insted.



176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/http_tools/parser.rb', line 176

def finish
  if @state == :body_on_close
    @buffer = @scanner
    @state = end_of_message
  elsif @state == :body_chunked && @buffer.eos? && !@trailer_expected &&
    @header.any? {|k,v| CONNECTION.casecmp(k) == 0 && CLOSE.casecmp(v) == 0}
    @state = end_of_message
  elsif @state == :start && @buffer.string.length < 1
    raise EmptyMessageError.new("Message empty")
  else
    raise MessageIncompleteError.new("Message ended early")
  end
  self
end

#finished?Boolean

:call-seq: parser.finished? -> bool

Returns true when the parser has come to the end of the message, false otherwise.

Some HTTP servers may not supply the necessary information in the response to determine the end of the message (e.g., no content length) and insted close the connection to signify the end of the message, see #finish for how to deal with this.

Returns:

  • (Boolean)


201
202
203
# File 'lib/http_tools/parser.rb', line 201

def finished?
  @state == :end_of_message
end

#header?Boolean

:call-seq: parser.header? -> bool

Returns true when the parser has received the complete header, false otherwise.

Returns:

  • (Boolean)


210
211
212
# File 'lib/http_tools/parser.rb', line 210

def header?
  @header_complete
end

#inspectObject

:nodoc:



293
294
295
# File 'lib/http_tools/parser.rb', line 293

def inspect # :nodoc:
  super.sub(/ .*>$/, " #{posstr} #{state}>")
end

#resetObject

:call-seq: parser.reset -> parser

Reset the parser so it can be used to process a new request. Callbacks/delegates will not be removed.



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# File 'lib/http_tools/parser.rb', line 235

def reset
  @state = :start
  @buffer.string.replace("")
  @buffer.reset
  @request_method = nil
  @path_info = nil
  @query_string = nil
  @request_uri = nil
  @version = nil
  @status_code = nil
  @header_complete = nil
  @header = {}
  @trailer = {}
  @last_key = nil
  @content_left = nil
  @chunked = nil
  @trailer_expected = nil
  @body = nil
  if @stream_callback == method(:stream_callback)
    @stream_callback = method(:setup_stream_callback)
  end
  self
end

#restObject

:call-seq: parser.rest -> string

Returns unconsumed data in the parser’s buffer.



218
219
220
# File 'lib/http_tools/parser.rb', line 218

def rest
  @buffer.rest
end

#rest_sizeObject

:call-seq: parser.rest_size -> int

Returns the size in bytes of the unconsumed data in the parser’s buffer.



226
227
228
# File 'lib/http_tools/parser.rb', line 226

def rest_size
  @buffer.rest_size
end