Class: Mechanize::HTTP::ContentDispositionParser

Inherits:
Object
  • Object
show all
Defined in:
lib/mechanize/http/content_disposition_parser.rb

Overview

Parser Content-Disposition headers that loosely follows RFC 2183.

Beyond RFC 2183, this parser allows:

  • Missing disposition-type

  • Multiple semicolons

  • Whitespace around semicolons

  • Dates in ISO 8601 format

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeContentDispositionParser

Creates a new parser Content-Disposition headers



40
41
42
# File 'lib/mechanize/http/content_disposition_parser.rb', line 40

def initialize
  @scanner = nil
end

Instance Attribute Details

#scannerObject

:nodoc:



24
25
26
# File 'lib/mechanize/http/content_disposition_parser.rb', line 24

def scanner
  @scanner
end

Class Method Details

.parse(content_disposition) ⇒ Object

Parses the disposition type and params in the content_disposition string. The “Content-Disposition:” must be removed.



32
33
34
35
# File 'lib/mechanize/http/content_disposition_parser.rb', line 32

def self.parse content_disposition
  @parser ||= self.new
  @parser.parse content_disposition
end

Instance Method Details

#parse(content_disposition, header = false) ⇒ Object

Parses the content_disposition header. If header is set to true the “Content-Disposition:” portion will be parsed



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/mechanize/http/content_disposition_parser.rb', line 48

def parse content_disposition, header = false
  return nil if content_disposition.empty?

  @scanner = StringScanner.new content_disposition

  if header then
    return nil unless @scanner.scan(/Content-Disposition/i)
    return nil unless @scanner.scan(/:/)
    spaces
  end

  type = rfc_2045_token
  @scanner.scan(/;+/)

  if @scanner.peek(1) == '=' then
    @scanner.pos = 0
    type = nil
  end

  disposition = Mechanize::HTTP::ContentDisposition.new type

  spaces

  return nil unless parameters = parse_parameters

  disposition.filename          = parameters.delete 'filename'
  disposition.creation_date     = parameters.delete 'creation-date'
  disposition.modification_date = parameters.delete 'modification-date'
  disposition.read_date         = parameters.delete 'read-date'
  disposition.size              = parameters.delete 'size'
  disposition.parameters        = parameters

  disposition
end

#parse_parametersObject

Extracts disposition-param and returns a Hash.



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/mechanize/http/content_disposition_parser.rb', line 86

def parse_parameters
  parameters = {}

  while true do
    return nil unless param = rfc_2045_token
    param.downcase!
    return nil unless @scanner.scan(/=/)

    value = case param
            when /^filename$/ then
              rfc_2045_value
            when /^(creation|modification|read)-date$/ then
              date = rfc_2045_quoted_string

              begin
                Time.rfc822 date
              rescue ArgumentError
                begin
                  Time.iso8601 date
                rescue ArgumentError
                  nil
                end
              end
            when /^size$/ then
              rfc_2045_value.to_i(10)
            else
              rfc_2045_value
            end

    return nil unless value

    parameters[param] = value

    spaces

    break if @scanner.eos? or not @scanner.scan(/;+/)

    spaces
  end

  parameters
end

#rfc_2045_quoted_stringObject

quoted-string = <“> *(qtext/quoted-pair) <”>

qtext         = <any CHAR excepting <">, "\" & CR,
                 and including linear-white-space
quoted-pair   = "\" CHAR

Parses an RFC 2045 quoted-string



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/mechanize/http/content_disposition_parser.rb', line 137

def rfc_2045_quoted_string
  return nil unless @scanner.scan(/"/)

  text = String.new

  while true do
    chunk = @scanner.scan(/[\000-\014\016-\041\043-\133\135-\177]+/) # not \r "

    if chunk then
      text << chunk

      if @scanner.peek(1) == '\\' then
        @scanner.get_byte
        return nil if @scanner.eos?
        text << @scanner.get_byte
      elsif @scanner.scan(/\r\n[\t ]+/) then
        text << " "
      end
    else
      if '\\"' == @scanner.peek(2) then
        @scanner.skip(/\\/)
        text << @scanner.get_byte
      elsif '"' == @scanner.peek(1) then
        @scanner.get_byte
        break
      else
        return nil
      end
    end
  end

  text
end

#rfc_2045_tokenObject

token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, or tspecials>

Parses an RFC 2045 token



176
177
178
# File 'lib/mechanize/http/content_disposition_parser.rb', line 176

def rfc_2045_token
  @scanner.scan(/[^\000-\037\177()<>@,;:\\"\/\[\]?= ]+/)
end

#rfc_2045_valueObject

value := token / quoted-string

Parses an RFC 2045 value



185
186
187
188
189
190
191
# File 'lib/mechanize/http/content_disposition_parser.rb', line 185

def rfc_2045_value
  if @scanner.peek(1) == '"' then
    rfc_2045_quoted_string
  else
    rfc_2045_token
  end
end

#spacesObject

1*SP

Parses spaces



198
199
200
# File 'lib/mechanize/http/content_disposition_parser.rb', line 198

def spaces
  @scanner.scan(/ +/)
end