Class: HTTPTools::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/http_tools/parser.rb

Overview

HTTPTools::Parser is a pure Ruby HTTP request & response parser with an evented API.

The HTTP message can be fed in to the parser piece by piece as it comes over the wire, and the parser will call its callbacks as it works it’s way through the message.

Example:

parser = HTTPTools::Parser.new
parser.on(:header) do |header|
  puts parser.status_code + " " + parser.request_method
  puts parser.header.inspect
end
parser.on(:finish) {print parser.body}

parser << "HTTP/1.1 200 OK\r\n"
parser << "Content-Length: 20\r\n\r\n"
parser << "<h1>Hello world</h1>"

Prints:

200 OK
{"Content-Length" => "20"}
<h1>Hello world</h1>

Constant Summary collapse

COLON =

:stopdoc:

":".freeze
KEY_TERMINATOR =
": ".freeze
CONTENT_LENGTH =
"Content-Length".freeze
TRANSFER_ENCODING =
"Transfer-Encoding".freeze
TRAILER =
"Trailer".freeze
CONNECTION =
"Connection".freeze
CLOSE =
"close".freeze
CHUNKED =
"chunked".freeze
REQUEST_METHOD =
"REQUEST_METHOD".freeze
PATH_INFO =
"PATH_INFO".freeze
QUERY_STRING =
"QUERY_STRING".freeze
SERVER_NAME =
"SERVER_NAME".freeze
SERVER_PORT =
"SERVER_PORT".freeze
HTTP_HOST =
"HTTP_HOST".freeze
RACK_INPUT =
"rack.input".freeze
PROTOTYPE_ENV =
{
"SCRIPT_NAME" => "".freeze,
"rack.version" => [1, 1].freeze,
"rack.url_scheme" => "http".freeze,
"rack.errors" => STDERR,
"rack.multithread" => false,
"rack.multiprocess" => false,
"rack.run_once" => false}.freeze
HTTP_ =
"HTTP_".freeze
LOWERCASE =
"a-z-".freeze
UPPERCASE =
"A-Z_".freeze
NO_HTTP_ =
{"CONTENT_LENGTH" => true, "CONTENT_TYPE" => true}
EVENTS =

:startdoc:

%W{header stream trailer finish error}.map {|e| e.freeze}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeParser

:call-seq: Parser.new -> parser

Create a new HTTPTools::Parser.



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/http_tools/parser.rb', line 82

def initialize
  @state = :start
  @buffer = @scanner = StringScanner.new("")
  @header = {}
  @trailer = {}
  @force_no_body = nil
  @allow_html_without_header = nil
  @force_trailer = nil
  @status_code = nil
  @content_left = nil
  @chunked = nil
  @body = nil
  @header_callback = nil
  @stream_callback = method(:setup_stream_callback)
  @trailer_callback = nil
  @finish_callback = nil
  @error_callback = nil
end

Instance Attribute Details

#allow_html_without_headerObject

Allow responses with no status line or headers if it looks like HTML.



76
77
78
# File 'lib/http_tools/parser.rb', line 76

def allow_html_without_header
  @allow_html_without_header
end

#bodyObject (readonly)

Returns the value of attribute body.



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def body
  @body
end

#force_no_bodyObject

Skip parsing the body, e.g. with the response to a HEAD request.



73
74
75
# File 'lib/http_tools/parser.rb', line 73

def force_no_body
  @force_no_body
end

#force_trailerObject

Force parser to expect and parse a trailer when Trailer header missing.



70
71
72
# File 'lib/http_tools/parser.rb', line 70

def force_trailer
  @force_trailer
end

#headerObject (readonly)

Returns the value of attribute header.



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def header
  @header
end

#messageObject (readonly)

Returns the value of attribute message.



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def message
  @message
end

#path_infoObject (readonly)

Returns the value of attribute path_info.



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def path_info
  @path_info
end

#query_stringObject (readonly)

Returns the value of attribute query_string.



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def query_string
  @query_string
end

#request_methodObject (readonly)

Returns the value of attribute request_method.



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def request_method
  @request_method
end

#request_uriObject (readonly)

Returns the value of attribute request_uri.



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def request_uri
  @request_uri
end

#stateObject (readonly)

:nodoc:



65
66
67
# File 'lib/http_tools/parser.rb', line 65

def state
  @state
end

#status_codeObject (readonly)

Returns the value of attribute status_code.



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def status_code
  @status_code
end

#trailerObject (readonly)

Returns the value of attribute trailer.



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def trailer
  @trailer
end

#versionObject (readonly)

Returns the value of attribute version.



66
67
68
# File 'lib/http_tools/parser.rb', line 66

def version
  @version
end

Instance Method Details

#add_listener(event, proc = nil, &block) ⇒ Object Also known as: on

:call-seq: parser.add_listener(event) {|arg| block} -> parser parser.add_listener(event, proc) -> parser parser.on(event) {|arg| block} -> parser parser.on(event, proc) -> parser

Available events are :header, :stream, :trailer, :finish, and :error.

Adding a second callback for an event will overwite the existing callback.

Events:

header

Called when headers are complete

stream

Supplied with one argument, the last chunk of body data fed in to the parser as a String, e.g. “<h1>Hello”. If no listener is set for this event the body can be retrieved with #body

trailer

Called on the completion of the trailer, if present

finish

Called on completion of the entire message. Any unconsumed data (such as the start of the next message with keepalive) can be retrieved with #rest

error

Supplied with one argument, an error encountered while parsing as a HTTPTools::ParseError. If a listener isn’t registered for this event, an exception will be raised when an error is encountered



259
260
261
262
# File 'lib/http_tools/parser.rb', line 259

def add_listener(event, proc=nil, &block)
  instance_variable_set(:"@#{event}_callback", proc || block)
  self
end

#concat(data) ⇒ Object Also known as: <<

:call-seq: parser.concat(data) -> parser parser << data -> parser

Feed data in to the parser and trigger callbacks.

Will raise HTTPTools::ParseError on error, unless a callback has been set for the :error event, in which case the callback will recieve the error insted.



110
111
112
113
114
# File 'lib/http_tools/parser.rb', line 110

def concat(data)
  @buffer << data
  @state = send(@state)
  self
end

#envObject

:call-seq: parser.env -> hash or nil

Returns a Rack compatible environment hash. Will return nil if called before headers are complete.

“rack.input” is only supplied if #env is called after parsing the request has finsished, and no listener is set for the stream event, otherwise you must add it yourself to make the environment hash fully Rack compliant



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/http_tools/parser.rb', line 126

def env
  return unless @header_complete
  env = PROTOTYPE_ENV.dup
  env[REQUEST_METHOD] = @request_method
  env[PATH_INFO] = @path_info
  env[QUERY_STRING] = @query_string
  @header.each do |key, value|
    upper_key = key.tr(LOWERCASE, UPPERCASE)
    upper_key = HTTP_ + upper_key unless NO_HTTP_.key?(upper_key)
    env[upper_key] = value
  end
  host, port = env[HTTP_HOST].split(COLON)
  env[SERVER_NAME] = host
  env[SERVER_PORT] = port || "80"
  @trailer.each {|k, val| env[HTTP_ + k.tr(LOWERCASE, UPPERCASE)] = val}
  if @body || @stream_callback == method(:setup_stream_callback)
    env[RACK_INPUT] = StringIO.new(@body || "")
  end
  env
end

#finishObject

:call-seq: parser.finish -> parser

Used to notify the parser that the request has finished in a case where it can not be determined by the request itself.

For example, when a server does not set a content length, and instead relies on closing the connection to signify the body end.

until parser.finished?
  begin
    parser << socket.sysread(1024 * 16)
  rescue EOFError
    parser.finish
    break
  end
end

This method can not be used to interrupt parsing from within a callback.

Will raise HTTPTools::MessageIncompleteError if called too early, or HTTPTools::EndOfMessageError if the message has already finished, unless a callback has been set for the :error event, in which case the callback will recieve the error insted.



170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/http_tools/parser.rb', line 170

def finish
  if @state == :body_on_close
    @buffer = @scanner
    @state = end_of_message
  elsif @state == :body_chunked && @buffer.eos? && !@trailer_expected &&
    @header.any? {|k,v| CONNECTION.casecmp(k) == 0 && CLOSE.casecmp(v) == 0}
    @state = end_of_message
  elsif @state == :start && @buffer.string.length < 1
    raise EmptyMessageError.new("Message empty")
  else
    raise MessageIncompleteError.new("Message ended early")
  end
  self
end

#finished?Boolean

:call-seq: parser.finished? -> bool

Returns true when the parser has come to the end of the message, false otherwise.

Some HTTP servers may not supply the necessary information in the response to determine the end of the message (e.g., no content length) and insted close the connection to signify the end of the message, see #finish for how to deal with this.

Returns:

  • (Boolean)


195
196
197
# File 'lib/http_tools/parser.rb', line 195

def finished?
  @state == :end_of_message
end

#inspectObject

:nodoc:



265
266
267
268
269
# File 'lib/http_tools/parser.rb', line 265

def inspect # :nodoc:
  str = to_s
  str[-1, 0] = " #{posstr} #{state}"
  str
end

#resetObject

:call-seq: parser.reset -> parser

Reset the parser so it can be used to process a new request. Callbacks/delegates will not be removed.



212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# File 'lib/http_tools/parser.rb', line 212

def reset
  @state = :start
  @buffer.string.replace("")
  @buffer.reset
  @request_method = nil
  @path_info = nil
  @query_string = nil
  @request_uri = nil
  @version = nil
  @status_code = nil
  @header = {}
  @trailer = {}
  @last_key = nil
  @content_left = nil
  @chunked = nil
  @trailer_expected = nil
  self
end

#restObject

:call-seq: parser.rest -> string

Returns unconsumed data in the parser’s buffer.



203
204
205
# File 'lib/http_tools/parser.rb', line 203

def rest
  @buffer.rest
end