Class: URI::BLURI

Inherits:
HTTP
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/uri/bluri.rb

Overview

A URI class with a bit extra for canonicalising query strings

Constant Summary collapse

PATH_ESCAPE_MAPPINGS =
{
  '[' => '%5b',
  ']' => '%5d',
  ',' => '%2c',
  '"' => '%22',
  "'" => '%27',
  '|' => '%7c',
  '!' => '%21',
  '£' => '%c2%a3'
}
PATH_UNESCAPE_MAPPINGS =
{
  '%7e' => '~',
  '%21' => '!'
}
REQUIRE_REGEX_ESCAPE =
%w<. | ( ) [ ] { } + \ ^ $ * ?> & PATH_ESCAPE_MAPPINGS.keys

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(uri_str) ⇒ BLURI

Returns a new instance of BLURI.

Raises:

  • (URI::InvalidURIError)


30
31
32
33
# File 'lib/uri/bluri.rb', line 30

def initialize(uri_str)
  @uri = ::Addressable::URI.parse(uri_str)
  raise URI::InvalidURIError, "'#{uri_str}' not a valid URI" unless @uri
end

Class Method Details

.parse(uri_str) ⇒ Object



54
55
56
57
58
59
60
61
# File 'lib/uri/bluri.rb', line 54

def self.parse(uri_str)
  # Deal with known URI spec breaks - leading/trailing spaces and unencoded entities
  if uri_str.is_a? String
    uri_str = uri_str.strip.downcase.gsub(' ', '%20')
    uri_str.gsub!('&', '%26') if uri_str =~ /^mailto:.*&.*/
  end
  BLURI.new(uri_str)
end

.path_escape_char_regexObject

Generate a regex which matches all characters in PATH_ESCAPE_MAPPINGS



93
94
95
96
97
98
# File 'lib/uri/bluri.rb', line 93

def self.path_escape_char_regex
  @path_escape_char_regex ||=
      Regexp.new('[' + PATH_ESCAPE_MAPPINGS.keys.map do |char|
        REQUIRE_REGEX_ESCAPE.include?(char) ? "\\#{char}" : char
      end.join + ']')
end

.path_unescape_code_regexObject

Generate a regex which matches all escape sequences in PATH_UNESCAPE_MAPPINGS



102
103
104
105
106
# File 'lib/uri/bluri.rb', line 102

def self.path_unescape_code_regex
  @path_unescape_code_regex ||= Regexp.new(
    PATH_UNESCAPE_MAPPINGS.keys.map { |code| "(?:#{code})" }.join('|')
  )
end

Instance Method Details

#canonicalize!(options = {}) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/uri/bluri.rb', line 67

def canonicalize!(options = {})
  @uri.scheme = 'http' if @uri.scheme == 'https'

  @uri.path = @uri.path.sub(/\/*$/, '') if @uri.path =~ /^*\/$/
  @uri.path.gsub!(BLURI.path_escape_char_regex,   PATH_ESCAPE_MAPPINGS)
  @uri.path.gsub!(BLURI.path_unescape_code_regex, PATH_UNESCAPE_MAPPINGS)

  canonicalize_query!(options)

  @uri.fragment = nil
  self
end

#canonicalize_query!(options) ⇒ Object



80
81
82
83
84
85
86
87
88
89
# File 'lib/uri/bluri.rb', line 80

def canonicalize_query!(options)
  allow_all = (options[:allow_query] == :all)
  allowed_keys = [options[:allow_query]].flatten.compact.map(&:to_s) unless allow_all

  query_hash.keep_if do |k, _|
    allow_all || (allowed_keys.include?(k.to_s))
  end

  self.query_hash = QueryHash[query_hash.sort_by { |k, _| k }]
end

#has_query?Boolean

Returns:

  • (Boolean)


63
64
65
# File 'lib/uri/bluri.rb', line 63

def has_query?
  %w(http https).include?(@uri.scheme) && query
end

#query=(query_str) ⇒ Object



49
50
51
52
# File 'lib/uri/bluri.rb', line 49

def query=(query_str)
  @query_hash = nil
  @uri.query = query_str == '' ? nil : query_str
end

#query_hashObject



35
36
37
38
39
40
41
42
# File 'lib/uri/bluri.rb', line 35

def query_hash
  @query_hash ||= CGI::parse(self.query || '').tap do |query_hash|
    # By default, CGI::parse produces lots of arrays. Usually they have a single element
    # in them. That's correct but not terribly usable. Fix it here.
    query_hash.each_pair { |k, v| query_hash[k] = v[0] if v.length == 1 }
    query_hash.extend QueryHash
  end
end

#query_hash=(value) ⇒ Object



44
45
46
47
# File 'lib/uri/bluri.rb', line 44

def query_hash=(value)
  @query_hash = value
  @uri.query = @query_hash.to_s == '' ? nil : @query_hash.to_s
end