Class: Ur::ContentType

Inherits:

String

Object
String
Ur::ContentType

show all

Defined in:: lib/ur/content_type.rb

Overview

Ur::ContentType represents a Content-Type header field. it parses the media type and its components, as well as any parameters.

this class aims to be permissive in what it will parse. it will not raise any error when given a malformed or syntactically invalid Content-Type string. fields and parameters parsed from invalid Content-Type strings are undefined, but this class generally tries to make the most sense of what it's given.

this class is based on RFCs:

Hypertext Transfer Protocol (HTTP/1.1): Semantics and Content Section 3.1.1.1. Media Type https://tools.ietf.org/html/rfc7231#section-3.1.1.1
Media Type Specifications and Registration Procedures https://tools.ietf.org/html/rfc6838
Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies. Section 5.1. Syntax of the Content-Type Header Field https://tools.ietf.org/html/rfc2045#section-5.1
Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types https://tools.ietf.org/html/rfc2046
Additional Media Type Structured Syntax Suffixes https://tools.ietf.org/html/rfc6839

Constant Summary collapse

MEDIA_TYPE_REGEXP = the character ranges in this SHOULD be significantly more restrictive, and the /<subtype> construct should not be optional. however, we'll aim to match whatever media type we are given. example: MEDIA_TYPE_REGEXP.match('application/vnd.github+json').named_captures => { "media_type" => "application/vnd.github+json", "type" => "application", "subtype" => "vnd.github+json", "facet" => "vnd", "suffix" => "json", } example of being more permissive than the spec allows: MEDIA_TYPE_REGEXP.match('where the %$*! am I').named_captures => { "media_type" => "where the %$*! am I", "type" => "where the %$*! am I", "subtype" => nil, "facet" => nil, "suffix" => nil }

%r{
  (?<media_type>       # the media type includes the type and subtype
    (?<type>[^\/;\"]*) # the type precedes the first slash
    (?:\/              # slash
      (?<subtype>      # the subtype includes the facet, the suffix, and bits in between
        (?:
          (?<facet>[^.+;\"]*) # the facet name comes before the first . in the subtype
          \.             # dot
        )?
        [^\+;\"]*      # anything between facet and suffix
        (?:\+          # plus
          (?<suffix>[^;\"]*) # optional suffix
        )?
      )
    )? # the subtype should not be optional, but we will match a type without subtype anyway
  )
}x

SOME_TEXT_SUBTYPES =

%w(
  x-www-form-urlencoded
  json
  json-seq
  jwt
  jose
  yaml
  x-yaml
  xml
  html
  css
  javascript
  ecmascript
).map(&:freeze).freeze

Instance Attribute Summary collapse

#facet ⇒ String^? readonly
the 'facet' portion of our media type.
#media_type ⇒ String^? readonly
the media type of this content type.
#parameters ⇒ Hash<String, String> readonly
parameters of this content type.
#subtype ⇒ String^? readonly
the 'subtype' portion of our media type.
#suffix ⇒ String^? readonly
the 'suffix' portion of our media type.
#type ⇒ String^? readonly
the 'type' portion of our media type.

Instance Method Summary collapse

#binary?(unknown: true) ⇒ Boolean
does this content type appear to be binary? this library makes its best guess based on a very incomplete knowledge of which media types indicate binary or text.
#form_urlencoded? ⇒ Boolean
is this a x-www-form-urlencoded content type?.
#initialize(*a) ⇒ ContentType constructor
A new instance of ContentType.
#json? ⇒ Boolean
is this a JSON content type?.
#subtype?(other_subtype) ⇒ Boolean
is the 'subtype' portion of our media type equal (case-insensitive) to the given other_subtype.
#suffix?(other_suffix) ⇒ Boolean
is the 'suffix' portion of our media type equal (case-insensitive) to the given other_suffix.
#type?(other_type) ⇒ Boolean
is the 'type' portion of our media type equal (case-insensitive) to the given other_type.
#type_application? ⇒ Boolean
is the 'type' portion of our media type 'application'.
#type_audio? ⇒ Boolean
is the 'type' portion of our media type 'audio'.
#type_image? ⇒ Boolean
is the 'type' portion of our media type 'image'.
#type_message? ⇒ Boolean
is the 'type' portion of our media type 'message'.
#type_multipart? ⇒ Boolean
is the 'type' portion of our media type 'multipart'.
#type_text? ⇒ Boolean
is the 'type' portion of our media type 'text'.
#type_video? ⇒ Boolean
is the 'type' portion of our media type 'video'.
#xml? ⇒ Boolean
is this an XML content type?.

Constructor Details

#initialize(*a) ⇒ `ContentType`

Returns a new instance of ContentType.

# File 'lib/ur/content_type.rb', line 72

def initialize(*a)
  super

  scanner = StringScanner.new(self)

  if scanner.scan(MEDIA_TYPE_REGEXP)
    @media_type = scanner[:media_type].strip.freeze if scanner[:media_type]
    @type      = scanner[:type].strip.freeze       if scanner[:type]
    @subtype  = scanner[:subtype].strip.freeze    if scanner[:subtype]
    @facet   = scanner[:facet].strip.freeze      if scanner[:facet]
    @suffix = scanner[:suffix].strip.freeze     if scanner[:suffix]
  end

  @parameters = Hash.new do |h, k|
    if k.respond_to?(:downcase) && k != k.downcase
      h[k.downcase]
    else
      nil
    end
  end

  while scanner.scan(/(;\s*)+/)
    key = scanner.scan(/[^;=\"]*/)
    if key && scanner.scan(/=/)
      value = String.new
      until scanner.eos? || scanner.check(/;/)
        if scanner.scan(/\s+/)
          ws = scanner[0]
          # discard trailing whitespace.
          # other whitespace isn't technically valid but we are permissive so we put it in the value.
          value << ws unless scanner.eos? || scanner.check(/;/)
        elsif scanner.scan(/"/)
          until scanner.eos? || scanner.scan(/"/)
            if scanner.scan(/\\/)
              value << scanner.getch unless scanner.eos?
            end
            value << scanner.scan(/[^\"\\]*/)
          end
        else
          value << scanner.scan(/[^\s;\"]*/)
        end
      end
      @parameters[key.downcase.freeze] = value.freeze
    end
  end

  @parameters.freeze

  freeze
end

Instance Attribute Details

the 'facet' portion of our media type. e.g. "vnd" in content-type: application/vnd.github+json; charset="utf-8"

Returns:

(String, nil)



141
142
143

# File 'lib/ur/content_type.rb', line 141

def facet
  @facet
end

#media_type ⇒ `String`^? (readonly)

the media type of this content type. e.g. "application/vnd.github+json" in content-type: application/vnd.github+json; charset="utf-8"

Returns:

(String, nil)



126
127
128

# File 'lib/ur/content_type.rb', line 126

def media_type
  @media_type
end

#parameters ⇒ `Hash<String, String>` (readonly)

parameters of this content type. e.g. {"charset" => "utf-8"} in content-type: application/vnd.github+json; charset="utf-8"

Returns:

(Hash<String, String>)



151
152
153

# File 'lib/ur/content_type.rb', line 151

def parameters
  @parameters
end

#subtype ⇒ `String`^? (readonly)

the 'subtype' portion of our media type. e.g. "vnd.github+json" in content-type: application/vnd.github+json; charset="utf-8"

Returns:

(String, nil)



136
137
138

# File 'lib/ur/content_type.rb', line 136

def subtype
  @subtype
end

#suffix ⇒ `String`^? (readonly)

the 'suffix' portion of our media type. e.g. "json" in content-type: application/vnd.github+json; charset="utf-8"

Returns:

(String, nil)



146
147
148

# File 'lib/ur/content_type.rb', line 146

def suffix
  @suffix
end

#type ⇒ `String`^? (readonly)

the 'type' portion of our media type. e.g. "application" in content-type: application/vnd.github+json; charset="utf-8"

Returns:

(String, nil)



131
132
133

# File 'lib/ur/content_type.rb', line 131

def type
  @type
end

Instance Method Details

#binary?(unknown: true) ⇒ `Boolean`

does this content type appear to be binary? this library makes its best guess based on a very incomplete knowledge of which media types indicate binary or text.

Parameters:

unknown (Boolean) (defaults to: true) —
return this value when we have no idea whether our media type is binary or text.

Returns:

(Boolean)

# File 'lib/ur/content_type.rb', line 195

def binary?(unknown: true)
  return false if type_text?

  SOME_TEXT_SUBTYPES.each do |cmpsubtype|
    return false if (suffix ? suffix.casecmp?(cmpsubtype) : subtype ? subtype.casecmp?(cmpsubtype) : false)
  end

  # these are generally binary
  return true if type_image? || type_audio? || type_video?

  # we're out of ideas
  return unknown
end

#form_urlencoded? ⇒ `Boolean`

is this a x-www-form-urlencoded content type?

Returns:

(Boolean)



223
224
225

# File 'lib/ur/content_type.rb', line 223

def form_urlencoded?
  suffix ? suffix.casecmp?('x-www-form-urlencoded') : subtype ? subtype.casecmp?('x-www-form-urlencoded') : false
end

#json? ⇒ `Boolean`

is this a JSON content type?

Returns:

(Boolean)



211
212
213

# File 'lib/ur/content_type.rb', line 211

def json?
  suffix ? suffix.casecmp?('json') : subtype ? subtype.casecmp?('json') : false
end

#subtype?(other_subtype) ⇒ `Boolean`

is the 'subtype' portion of our media type equal (case-insensitive) to the given other_subtype

Parameters:

other_subtype

Returns:

(Boolean)



163
164
165

# File 'lib/ur/content_type.rb', line 163

def subtype?(other_subtype)
  subtype ? subtype.casecmp?(other_subtype) : false
end

#suffix?(other_suffix) ⇒ `Boolean`

is the 'suffix' portion of our media type equal (case-insensitive) to the given other_suffix

Parameters:

other_suffix

Returns:

(Boolean)



170
171
172

# File 'lib/ur/content_type.rb', line 170

def suffix?(other_suffix)
  suffix ? suffix.casecmp?(other_suffix) : false
end

#type?(other_type) ⇒ `Boolean`

is the 'type' portion of our media type equal (case-insensitive) to the given other_type

Parameters:

other_type

Returns:

(Boolean)



156
157
158

# File 'lib/ur/content_type.rb', line 156

def type?(other_type)
  type ? type.casecmp?(other_type) : false
end

#type_application? ⇒ `Boolean`

is the 'type' portion of our media type 'application'

Returns:

(Boolean)



253
254
255

# File 'lib/ur/content_type.rb', line 253

def type_application?
  type ? type.casecmp?('application') : false
end

#type_audio? ⇒ `Boolean`

is the 'type' portion of our media type 'audio'

Returns:

(Boolean)



241
242
243

# File 'lib/ur/content_type.rb', line 241

def type_audio?
  type ? type.casecmp?('audio') : false
end

#type_image? ⇒ `Boolean`

is the 'type' portion of our media type 'image'

Returns:

(Boolean)



235
236
237

# File 'lib/ur/content_type.rb', line 235

def type_image?
  type ? type.casecmp?('image') : false
end

#type_message? ⇒ `Boolean`

is the 'type' portion of our media type 'message'

Returns:

(Boolean)



259
260
261

# File 'lib/ur/content_type.rb', line 259

def type_message?
  type ? type.casecmp?('message') : false
end

#type_multipart? ⇒ `Boolean`

is the 'type' portion of our media type 'multipart'

Returns:

(Boolean)



265
266
267

# File 'lib/ur/content_type.rb', line 265

def type_multipart?
  type ? type.casecmp?('multipart') : false
end

#type_text? ⇒ `Boolean`

is the 'type' portion of our media type 'text'

Returns:

(Boolean)



229
230
231

# File 'lib/ur/content_type.rb', line 229

def type_text?
  type ? type.casecmp?('text') : false
end

#type_video? ⇒ `Boolean`

is the 'type' portion of our media type 'video'

Returns:

(Boolean)



247
248
249

# File 'lib/ur/content_type.rb', line 247

def type_video?
  type ? type.casecmp?('video') : false
end

#xml? ⇒ `Boolean`

is this an XML content type?

Returns:

(Boolean)



217
218
219

# File 'lib/ur/content_type.rb', line 217

def xml?
  suffix ? suffix.casecmp?('xml') : subtype ? subtype.casecmp?('xml') : false
end

Class: Ur::ContentType

Overview

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*a) ⇒ ContentType

Instance Attribute Details

#facet ⇒ String? (readonly)

#media_type ⇒ String? (readonly)

#parameters ⇒ Hash<String, String> (readonly)

#subtype ⇒ String? (readonly)

#suffix ⇒ String? (readonly)

#type ⇒ String? (readonly)

Instance Method Details

#binary?(unknown: true) ⇒ Boolean

#form_urlencoded? ⇒ Boolean

#json? ⇒ Boolean

#subtype?(other_subtype) ⇒ Boolean

#suffix?(other_suffix) ⇒ Boolean

#type?(other_type) ⇒ Boolean

#type_application? ⇒ Boolean

#type_audio? ⇒ Boolean

#type_image? ⇒ Boolean

#type_message? ⇒ Boolean

#type_multipart? ⇒ Boolean

#type_text? ⇒ Boolean

#type_video? ⇒ Boolean

#xml? ⇒ Boolean

#initialize(*a) ⇒ `ContentType`

#facet ⇒ `String`^? (readonly)

#media_type ⇒ `String`^? (readonly)

#parameters ⇒ `Hash<String, String>` (readonly)

#subtype ⇒ `String`^? (readonly)

#suffix ⇒ `String`^? (readonly)

#type ⇒ `String`^? (readonly)

#binary?(unknown: true) ⇒ `Boolean`

#form_urlencoded? ⇒ `Boolean`

#json? ⇒ `Boolean`

#subtype?(other_subtype) ⇒ `Boolean`

#suffix?(other_suffix) ⇒ `Boolean`

#type?(other_type) ⇒ `Boolean`

#type_application? ⇒ `Boolean`

#type_audio? ⇒ `Boolean`

#type_image? ⇒ `Boolean`

#type_message? ⇒ `Boolean`

#type_multipart? ⇒ `Boolean`

#type_text? ⇒ `Boolean`

#type_video? ⇒ `Boolean`

#xml? ⇒ `Boolean`