Class: MultipartParser::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/multipart_parser/parser.rb

Overview

A low level parser for multipart messages, based on the node-formidable parser.

Instance Method Summary collapse

Constructor Details

#initializeParser

Returns a new instance of Parser.



6
7
8
9
10
11
12
13
14
15
# File 'lib/multipart_parser/parser.rb', line 6

def initialize
  @boundary = nil
  @boundary_chars = nil
  @lookbehind = nil
  @state = :parser_uninitialized
  @index = 0  # Index into boundary or header
  @flags = {}
  @marks = {} # Keep track of different parts
  @callbacks = {}
end

Instance Method Details

#init_with_boundary(boundary) ⇒ Object

Initializes the parser, using the given boundary



18
19
20
21
22
23
24
25
26
27
# File 'lib/multipart_parser/parser.rb', line 18

def init_with_boundary(boundary)
  @boundary = "\r\n--" + boundary
  @lookbehind = "\0"*(@boundary.length + 8)
  @state = :start

  @boundary_chars = {}
  @boundary.each_byte do |b|
    @boundary_chars[b.chr] = true
  end
end

#on(event, &callback) ⇒ Object

Registers a callback to be called when the given event occurs. Each callback is expected to take three parameters: buffer, start_index, and end_index. All of these parameters may be null, depending on the callback. Valid callbacks are: :end :header_field :header_value :header_end :headers_end :part_begin :part_data :part_end



42
43
44
# File 'lib/multipart_parser/parser.rb', line 42

def on(event, &callback)
  @callbacks[event] = callback
end

#write(buffer) ⇒ Object

Writes data to the parser. Returns the number of bytes parsed. In practise, this means that if the return value is less than the buffer length, a parse error occured.



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/multipart_parser/parser.rb', line 50

def write(buffer)
  i = 0
  buffer_length = buffer.length
  index = @index
  flags = @flags.dup
  state = @state
  lookbehind = @lookbehind
  boundary = @boundary
  boundary_chars = @boundary_chars
  boundary_length = @boundary.length
  boundary_end = boundary_length - 1

  while i < buffer_length
    c = buffer[i, 1]
    case state
      when :parser_uninitialized
        return i;
      when :start
        index = 0;
        state = :start_boundary
      when :start_boundary # Differs in that it has no preceeding \r\n
        if index == boundary_length - 2
          return i unless c == "\r"
          index += 1
        elsif index - 1 == boundary_length - 2
          return i unless c == "\n"
          # Boundary read successfully, begin next part
          callback(:part_begin)
          state = :header_field_start
        else
          return i unless c == boundary[index+2, 1] # Unexpected character
          index += 1
        end
        i += 1
      when :header_field_start
        state = :header_field
        @marks[:header_field] = i
        index = 0
      when :header_field
        if c == "\r"
          @marks.delete :header_field
          state = :headers_almost_done
        else
          index += 1
          unless c == "-" # Skip hyphens
            if c == ":"
              return i if index == 1 # Empty header field
              data_callback(:header_field, buffer, i, :clear => true)
              state = :header_value_start
            else
              cl = c.downcase
              return i if cl < "a" || cl > "z"
            end
          end
        end
        i += 1
      when :header_value_start
        if c == " " # Skip spaces
          i += 1
        else
          @marks[:header_value] = i
          state = :header_value
        end
      when :header_value
        if c == "\r"
          data_callback(:header_value, buffer, i, :clear => true)
          callback(:header_end)
          state = :header_value_almost_done
        end
        i += 1
      when :header_value_almost_done
        return i unless c == "\n"
        state = :header_field_start
        i += 1
      when :headers_almost_done
        return i unless c == "\n"
        callback(:headers_end)
        state = :part_data_start
        i += 1
      when :part_data_start
        state = :part_data
        @marks[:part_data] = i
      when :part_data
        prev_index = index

        if index == 0
          # Boyer-Moore derived algorithm to safely skip non-boundary data
          # See http://debuggable.com/posts/parsing-file-uploads-at-500-
          # mb-s-with-node-js:4c03862e-351c-4faa-bb67-4365cbdd56cb
          while i + boundary_length <= buffer_length
            break if boundary_chars.has_key? buffer[i + boundary_end].chr
            i += boundary_length
          end
          c = buffer[i, 1]
        end

        if index < boundary_length
          if boundary[index, 1] == c
            if index == 0
              data_callback(:part_data, buffer, i, :clear => true)
            end
            index += 1
          else # It was not the boundary we found, after all
            index = 0
          end
        elsif index == boundary_length
          index += 1
          if c == "\r"
            flags[:part_boundary] = true
          elsif c == "-"
            flags[:last_boundary] = true
          else # We did not find a boundary after all
            index = 0
          end
        elsif index - 1 == boundary_length
          if flags[:part_boundary]
            index = 0
            if c == "\n"
              flags.delete :part_boundary
              callback(:part_end)
              callback(:part_begin)
              state = :header_field_start
              i += 1
              next # Ugly way to break out of the case statement
            end
          elsif flags[:last_boundary]
            if c == "-"
              callback(:part_end)
              callback(:end)
              state = :end
            else
              index = 0 # False alarm
            end
          else
            index = 0
          end
        end

        if index > 0
          # When matching a possible boundary, keep a lookbehind
          # reference in case it turns out to be a false lead
          lookbehind[index-1] = c
        elsif prev_index > 0
          # If our boundary turns out to be rubbish,
          # the captured lookbehind belongs to part_data
          callback(:part_data, lookbehind, 0, prev_index)
          @marks[:part_data] = i

          # Reconsider the current character as it might be the
          # beginning of a new sequence.
          i -= 1
        end

        i += 1
      when :end
        i += 1
      else
        return i;
    end
  end

  data_callback(:header_field, buffer, buffer_length)
  data_callback(:header_value, buffer, buffer_length)
  data_callback(:part_data, buffer, buffer_length)

  @index = index
  @state = state
  @flags = flags

  return buffer_length
end