Class: Twingly::URL

Inherits:
Object
  • Object
show all
Includes:
Comparable
Defined in:
lib/twingly/url.rb,
lib/twingly/version.rb,
lib/twingly/url/error.rb,
lib/twingly/url/hasher.rb,
lib/twingly/url/null_url.rb,
lib/twingly/url/utilities.rb

Defined Under Namespace

Modules: Error, Hasher, Utilities Classes: NullURL

Constant Summary collapse

VERSION =
"5.1.1"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(addressable_uri, public_suffix_domain) ⇒ URL

Returns a new instance of URL.



90
91
92
93
# File 'lib/twingly/url.rb', line 90

def initialize(addressable_uri, public_suffix_domain)
  @addressable_uri      = addressable_uri
  @public_suffix_domain = public_suffix_domain
end

Class Method Details

.internal_parse(potential_url) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/twingly/url.rb', line 38

def internal_parse(potential_url)
  addressable_uri = to_addressable_uri(potential_url)
  raise Twingly::URL::Error::ParseError if addressable_uri.nil?

  scheme = addressable_uri.scheme
  raise Twingly::URL::Error::ParseError unless scheme =~ ACCEPTED_SCHEMES

  # URLs that can't be normalized should not be valid
  try_addressable_normalize(addressable_uri)

  host = addressable_uri.host
  public_suffix_domain = PublicSuffix.parse(host, list: CUSTOM_PSL,
    default_rule: nil)
  raise Twingly::URL::Error::ParseError if public_suffix_domain.nil?

  raise Twingly::URL::Error::ParseError if public_suffix_domain.sld.nil?

  new(addressable_uri, public_suffix_domain)
rescue *ERRORS_TO_EXTEND => error
  error.extend(Twingly::URL::Error)
  raise
end

.parse(potential_url) ⇒ Object



29
30
31
32
33
34
35
36
# File 'lib/twingly/url.rb', line 29

def parse(potential_url)
  internal_parse(potential_url)
rescue Twingly::URL::Error, Twingly::URL::Error::ParseError => error
  NullURL.new
rescue Exception => error
  error.extend(Twingly::URL::Error)
  raise
end

.to_addressable_uri(potential_url) ⇒ Object



61
62
63
64
65
66
67
68
69
70
# File 'lib/twingly/url.rb', line 61

def to_addressable_uri(potential_url)
 if potential_url.is_a?(Addressable::URI)
    potential_url
  else
    potential_url = String(potential_url)
    potential_url = potential_url.scrub

    Addressable::URI.heuristic_parse(potential_url)
  end
end

.try_addressable_normalize(addressable_uri) ⇒ Object

Workaround for the following bug in addressable: github.com/sporkmonger/addressable/issues/224



74
75
76
77
78
79
80
81
82
# File 'lib/twingly/url.rb', line 74

def try_addressable_normalize(addressable_uri)
  addressable_uri.normalize
rescue ArgumentError => error
  if error.message.include?("invalid byte sequence in UTF-8")
    raise Twingly::URL::Error::ParseError
  end

  raise
end

Instance Method Details

#<=>(other) ⇒ Object



188
189
190
# File 'lib/twingly/url.rb', line 188

def <=>(other)
  self.to_s <=> other.to_s
end

#domainObject



119
120
121
# File 'lib/twingly/url.rb', line 119

def domain
  public_suffix_domain.domain
end

#hostObject



123
124
125
# File 'lib/twingly/url.rb', line 123

def host
  addressable_uri.host
end

#inspectObject



196
197
198
# File 'lib/twingly/url.rb', line 196

def inspect
  sprintf("#<%s:0x%x %s>", self.class.name, __id__, self.to_s)
end

#normalizedObject



139
140
141
142
143
144
145
146
147
# File 'lib/twingly/url.rb', line 139

def normalized
  normalized_url = addressable_uri.dup

  normalized_url.scheme = normalized_scheme
  normalized_url.host   = normalized_host
  normalized_url.path   = normalized_path

  self.class.parse(normalized_url)
end

#normalized_hostObject



153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/twingly/url.rb', line 153

def normalized_host
  host   = addressable_uri.normalized_host
  domain = public_suffix_domain

  unless domain.subdomain?
    host = "www.#{host}"
  end

  host = normalize_blogspot(host, domain)

  host
end

#normalized_pathObject



166
167
168
169
170
# File 'lib/twingly/url.rb', line 166

def normalized_path
  path = strip_trailing_slashes(addressable_uri.path)

  (path.empty?) ? "/" : path
end

#normalized_schemeObject



149
150
151
# File 'lib/twingly/url.rb', line 149

def normalized_scheme
  scheme.downcase
end

#originObject



127
128
129
# File 'lib/twingly/url.rb', line 127

def origin
  addressable_uri.origin
end

#passwordObject



180
181
182
# File 'lib/twingly/url.rb', line 180

def password
  addressable_uri.password.to_s
end

#pathObject



131
132
133
# File 'lib/twingly/url.rb', line 131

def path
  addressable_uri.path
end

#schemeObject



95
96
97
# File 'lib/twingly/url.rb', line 95

def scheme
  addressable_uri.scheme
end

#sldObject



103
104
105
# File 'lib/twingly/url.rb', line 103

def sld
  public_suffix_domain.sld
end

#tldObject



107
108
109
# File 'lib/twingly/url.rb', line 107

def tld
  public_suffix_domain.tld
end

#to_sObject



192
193
194
# File 'lib/twingly/url.rb', line 192

def to_s
  addressable_uri.to_s
end

#trdObject



99
100
101
# File 'lib/twingly/url.rb', line 99

def trd
  public_suffix_domain.trd.to_s
end

#ttldObject

Many ccTLDs have a second level underneath their ccTLD, use this when you don’t care about the second level.

[1]: en.wikipedia.org/wiki/Second-level_domain



115
116
117
# File 'lib/twingly/url.rb', line 115

def ttld
  tld.split(".").last
end

#userObject



176
177
178
# File 'lib/twingly/url.rb', line 176

def user
  addressable_uri.user.to_s
end

#userinfoObject



172
173
174
# File 'lib/twingly/url.rb', line 172

def userinfo
  addressable_uri.userinfo.to_s
end

#valid?Boolean

Returns:

  • (Boolean)


184
185
186
# File 'lib/twingly/url.rb', line 184

def valid?
  true
end

#without_schemeObject



135
136
137
# File 'lib/twingly/url.rb', line 135

def without_scheme
  self.to_s.sub(/\A#{scheme}:/, "")
end