Class: Twingly::URL

Inherits:
Object
  • Object
show all
Includes:
Comparable
Defined in:
lib/twingly/url.rb,
lib/twingly/version.rb,
lib/twingly/url/error.rb,
lib/twingly/url/hasher.rb,
lib/twingly/url/null_url.rb,
lib/twingly/url/utilities.rb

Defined Under Namespace

Modules: Error, Hasher, Utilities Classes: NullURL

Constant Summary collapse

VERSION =
"5.1.0"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(addressable_uri, public_suffix_domain) ⇒ URL

Returns a new instance of URL.



85
86
87
88
# File 'lib/twingly/url.rb', line 85

def initialize(addressable_uri, public_suffix_domain)
  @addressable_uri      = addressable_uri
  @public_suffix_domain = public_suffix_domain
end

Class Method Details

.internal_parse(potential_url) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/twingly/url.rb', line 33

def internal_parse(potential_url)
  addressable_uri = to_addressable_uri(potential_url)
  raise Twingly::URL::Error::ParseError if addressable_uri.nil?

  scheme = addressable_uri.scheme
  raise Twingly::URL::Error::ParseError unless scheme =~ ACCEPTED_SCHEMES

  # URLs that can't be normalized should not be valid
  try_addressable_normalize(addressable_uri)

  host = addressable_uri.host
  public_suffix_domain = PublicSuffix.parse(host, list: CUSTOM_PSL,
    default_rule: nil)
  raise Twingly::URL::Error::ParseError if public_suffix_domain.nil?

  raise Twingly::URL::Error::ParseError if public_suffix_domain.sld.nil?

  new(addressable_uri, public_suffix_domain)
rescue *ERRORS_TO_EXTEND => error
  error.extend(Twingly::URL::Error)
  raise
end

.parse(potential_url) ⇒ Object



27
28
29
30
31
# File 'lib/twingly/url.rb', line 27

def parse(potential_url)
  internal_parse(potential_url)
rescue Twingly::URL::Error, Twingly::URL::Error::ParseError => error
  NullURL.new
end

.to_addressable_uri(potential_url) ⇒ Object



56
57
58
59
60
61
62
63
64
65
# File 'lib/twingly/url.rb', line 56

def to_addressable_uri(potential_url)
 if potential_url.is_a?(Addressable::URI)
    potential_url
  else
    potential_url = String(potential_url)
    potential_url = potential_url.scrub

    Addressable::URI.heuristic_parse(potential_url)
  end
end

.try_addressable_normalize(addressable_uri) ⇒ Object

Workaround for the following bug in addressable: github.com/sporkmonger/addressable/issues/224



69
70
71
72
73
74
75
76
77
# File 'lib/twingly/url.rb', line 69

def try_addressable_normalize(addressable_uri)
  addressable_uri.normalize
rescue ArgumentError => error
  if error.message.include?("invalid byte sequence in UTF-8")
    raise Twingly::URL::Error::ParseError
  end

  raise
end

Instance Method Details

#<=>(other) ⇒ Object



183
184
185
# File 'lib/twingly/url.rb', line 183

def <=>(other)
  self.to_s <=> other.to_s
end

#domainObject



114
115
116
# File 'lib/twingly/url.rb', line 114

def domain
  public_suffix_domain.domain
end

#hostObject



118
119
120
# File 'lib/twingly/url.rb', line 118

def host
  addressable_uri.host
end

#inspectObject



191
192
193
# File 'lib/twingly/url.rb', line 191

def inspect
  sprintf("#<%s:0x%x %s>", self.class.name, __id__, self.to_s)
end

#normalizedObject



134
135
136
137
138
139
140
141
142
# File 'lib/twingly/url.rb', line 134

def normalized
  normalized_url = addressable_uri.dup

  normalized_url.scheme = normalized_scheme
  normalized_url.host   = normalized_host
  normalized_url.path   = normalized_path

  self.class.parse(normalized_url)
end

#normalized_hostObject



148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/twingly/url.rb', line 148

def normalized_host
  host   = addressable_uri.normalized_host
  domain = public_suffix_domain

  unless domain.subdomain?
    host = "www.#{host}"
  end

  host = normalize_blogspot(host, domain)

  host
end

#normalized_pathObject



161
162
163
164
165
# File 'lib/twingly/url.rb', line 161

def normalized_path
  path = strip_trailing_slashes(addressable_uri.path)

  (path.empty?) ? "/" : path
end

#normalized_schemeObject



144
145
146
# File 'lib/twingly/url.rb', line 144

def normalized_scheme
  scheme.downcase
end

#originObject



122
123
124
# File 'lib/twingly/url.rb', line 122

def origin
  addressable_uri.origin
end

#passwordObject



175
176
177
# File 'lib/twingly/url.rb', line 175

def password
  addressable_uri.password.to_s
end

#pathObject



126
127
128
# File 'lib/twingly/url.rb', line 126

def path
  addressable_uri.path
end

#schemeObject



90
91
92
# File 'lib/twingly/url.rb', line 90

def scheme
  addressable_uri.scheme
end

#sldObject



98
99
100
# File 'lib/twingly/url.rb', line 98

def sld
  public_suffix_domain.sld
end

#tldObject



102
103
104
# File 'lib/twingly/url.rb', line 102

def tld
  public_suffix_domain.tld
end

#to_sObject



187
188
189
# File 'lib/twingly/url.rb', line 187

def to_s
  addressable_uri.to_s
end

#trdObject



94
95
96
# File 'lib/twingly/url.rb', line 94

def trd
  public_suffix_domain.trd.to_s
end

#ttldObject

Many ccTLDs have a second level underneath their ccTLD, use this when you don’t care about the second level.

[1]: en.wikipedia.org/wiki/Second-level_domain



110
111
112
# File 'lib/twingly/url.rb', line 110

def ttld
  tld.split(".").last
end

#userObject



171
172
173
# File 'lib/twingly/url.rb', line 171

def user
  addressable_uri.user.to_s
end

#userinfoObject



167
168
169
# File 'lib/twingly/url.rb', line 167

def userinfo
  addressable_uri.userinfo.to_s
end

#valid?Boolean

Returns:

  • (Boolean)


179
180
181
# File 'lib/twingly/url.rb', line 179

def valid?
  true
end

#without_schemeObject



130
131
132
# File 'lib/twingly/url.rb', line 130

def without_scheme
  self.to_s.sub(/\A#{scheme}:/, "")
end