Class: Twingly::URL

Inherits:
Object
  • Object
show all
Includes:
Comparable
Defined in:
lib/twingly/url.rb,
lib/twingly/version.rb,
lib/twingly/url/error.rb,
lib/twingly/url/hasher.rb,
lib/twingly/url/null_url.rb,
lib/twingly/url/utilities.rb

Defined Under Namespace

Modules: Error, Hasher, Utilities Classes: NullURL

Constant Summary collapse

CARRIAGE_RETURN =
"\u000D"
LINE_FEED =
"\u000A"
VERSION =
"6.0.4"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(addressable_uri, public_suffix_domain) ⇒ URL

Returns a new instance of URL.



113
114
115
116
# File 'lib/twingly/url.rb', line 113

def initialize(addressable_uri, public_suffix_domain)
  @addressable_uri      = addressable_uri
  @public_suffix_domain = public_suffix_domain
end

Class Method Details

.clean_input(input) ⇒ Object



82
83
84
85
86
# File 'lib/twingly/url.rb', line 82

def clean_input(input)
  input = String(input)
  input = input.scrub
  input = strip_whitespace(input)
end

.internal_parse(input) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/twingly/url.rb', line 58

def internal_parse(input)
  potential_url   = clean_input(input)
  addressable_uri = Addressable::URI.heuristic_parse(potential_url)
  raise Twingly::URL::Error::ParseError if addressable_uri.nil?

  scheme = addressable_uri.scheme
  raise Twingly::URL::Error::ParseError unless scheme =~ ACCEPTED_SCHEMES

  # URLs that can't be normalized should not be valid
  try_addressable_normalize(addressable_uri)

  host = addressable_uri.host
  public_suffix_domain = PublicSuffix.parse(host, list: CUSTOM_PSL,
    default_rule: nil)
  raise Twingly::URL::Error::ParseError if public_suffix_domain.nil?

  raise Twingly::URL::Error::ParseError if public_suffix_domain.sld.nil?

  new(addressable_uri, public_suffix_domain)
rescue *ERRORS_TO_EXTEND => error
  error.extend(Twingly::URL::Error)
  raise
end

.parse(potential_url) ⇒ Object



49
50
51
52
53
54
55
56
# File 'lib/twingly/url.rb', line 49

def parse(potential_url)
  internal_parse(potential_url)
rescue Twingly::URL::Error, Twingly::URL::Error::ParseError => error
  NullURL.new
rescue Exception => error
  error.extend(Twingly::URL::Error)
  raise
end

.strip_whitespace(input) ⇒ Object



88
89
90
91
92
# File 'lib/twingly/url.rb', line 88

def strip_whitespace(input)
  return input unless input.encoding == Encoding::UTF_8

  input.gsub(LEADING_AND_TRAILING_WHITESPACE, "")
end

.try_addressable_normalize(addressable_uri) ⇒ Object

Workaround for the following bug in addressable: github.com/sporkmonger/addressable/issues/224



96
97
98
99
100
101
102
103
104
# File 'lib/twingly/url.rb', line 96

def try_addressable_normalize(addressable_uri)
  addressable_uri.normalize
rescue ArgumentError => error
  if error.message.include?("invalid byte sequence in UTF-8")
    raise Twingly::URL::Error::ParseError
  end

  raise
end

Instance Method Details

#<=>(other) ⇒ Object



211
212
213
# File 'lib/twingly/url.rb', line 211

def <=>(other)
  self.to_s <=> other.to_s
end

#domainObject



142
143
144
# File 'lib/twingly/url.rb', line 142

def domain
  public_suffix_domain.domain
end

#eql?(other) ⇒ Boolean

Returns:

  • (Boolean)


215
216
217
218
219
# File 'lib/twingly/url.rb', line 215

def eql?(other)
  return false unless other.is_a?(self.class)

  self.hash == other.hash
end

#hashObject



221
222
223
# File 'lib/twingly/url.rb', line 221

def hash
  self.to_s.hash
end

#hostObject



146
147
148
# File 'lib/twingly/url.rb', line 146

def host
  addressable_uri.host
end

#inspectObject



229
230
231
# File 'lib/twingly/url.rb', line 229

def inspect
  sprintf("#<%s:0x%x %s>", self.class.name, __id__, self.to_s)
end

#normalizedObject



162
163
164
165
166
167
168
169
170
# File 'lib/twingly/url.rb', line 162

def normalized
  normalized_url = addressable_uri.dup

  normalized_url.scheme = normalized_scheme
  normalized_url.host   = normalized_host
  normalized_url.path   = normalized_path

  self.class.parse(normalized_url)
end

#normalized_hostObject



176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/twingly/url.rb', line 176

def normalized_host
  host   = addressable_uri.normalized_host
  domain = public_suffix_domain

  unless domain.subdomain?
    host = "www.#{host}"
  end

  host = normalize_blogspot(host, domain)

  host
end

#normalized_pathObject



189
190
191
192
193
# File 'lib/twingly/url.rb', line 189

def normalized_path
  path = strip_trailing_slashes(addressable_uri.path)

  (path.empty?) ? "/" : path
end

#normalized_schemeObject



172
173
174
# File 'lib/twingly/url.rb', line 172

def normalized_scheme
  scheme.downcase
end

#originObject



150
151
152
# File 'lib/twingly/url.rb', line 150

def origin
  addressable_uri.origin
end

#passwordObject



203
204
205
# File 'lib/twingly/url.rb', line 203

def password
  addressable_uri.password.to_s
end

#pathObject



154
155
156
# File 'lib/twingly/url.rb', line 154

def path
  addressable_uri.path
end

#schemeObject



118
119
120
# File 'lib/twingly/url.rb', line 118

def scheme
  addressable_uri.scheme
end

#sldObject



126
127
128
# File 'lib/twingly/url.rb', line 126

def sld
  public_suffix_domain.sld
end

#tldObject



130
131
132
# File 'lib/twingly/url.rb', line 130

def tld
  public_suffix_domain.tld
end

#to_sObject



225
226
227
# File 'lib/twingly/url.rb', line 225

def to_s
  addressable_uri.to_s
end

#trdObject



122
123
124
# File 'lib/twingly/url.rb', line 122

def trd
  public_suffix_domain.trd.to_s
end

#ttldObject

Many ccTLDs have a second level underneath their ccTLD, use this when you don’t care about the second level.

[1]: en.wikipedia.org/wiki/Second-level_domain



138
139
140
# File 'lib/twingly/url.rb', line 138

def ttld
  tld.split(".").last
end

#userObject



199
200
201
# File 'lib/twingly/url.rb', line 199

def user
  addressable_uri.user.to_s
end

#userinfoObject



195
196
197
# File 'lib/twingly/url.rb', line 195

def userinfo
  addressable_uri.userinfo.to_s
end

#valid?Boolean

Returns:

  • (Boolean)


207
208
209
# File 'lib/twingly/url.rb', line 207

def valid?
  true
end

#without_schemeObject



158
159
160
# File 'lib/twingly/url.rb', line 158

def without_scheme
  self.to_s.sub(/\A#{scheme}:/, "")
end