Method: Addressable::URI.heuristic_parse
- Defined in:
- lib/addressable/uri.rb
.heuristic_parse(uri, hints = {}) ⇒ Addressable::URI
Converts an input to a URI. The input does not have to be a valid URI — the method will use heuristics to guess what URI was intended. This is not standards-compliant, merely user-friendly.
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
# File 'lib/addressable/uri.rb', line 191 def self.heuristic_parse(uri, hints={}) # If we were given nil, return nil. return nil unless uri # If a URI object is passed, just return itself. return uri.dup if uri.kind_of?(self) # If a URI object of the Ruby standard library variety is passed, # convert it to a string, then parse the string. # We do the check this way because we don't want to accidentally # cause a missing constant exception to be thrown. if uri.class.name =~ /^URI\b/ uri = uri.to_s end unless uri.respond_to?(:to_str) raise TypeError, "Can't convert #{uri.class} into String." end # Otherwise, convert to a String uri = uri.to_str.dup.strip hints = { :scheme => "http" }.merge(hints) case uri when /^http:\//i uri.sub!(/^http:\/+/i, "http://") when /^https:\//i uri.sub!(/^https:\/+/i, "https://") when /^feed:\/+http:\//i uri.sub!(/^feed:\/+http:\/+/i, "feed:http://") when /^feed:\//i uri.sub!(/^feed:\/+/i, "feed://") when %r[^file:/{4}]i uri.sub!(%r[^file:/+]i, "file:////") when %r[^file://localhost/]i uri.sub!(%r[^file://localhost/+]i, "file:///") when %r[^file:/+]i uri.sub!(%r[^file:/+]i, "file:///") when /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/ uri.sub!(/^/, hints[:scheme] + "://") when /\A\d+\..*:\d+\z/ uri = "#{hints[:scheme]}://#{uri}" end match = uri.match(URIREGEX) fragments = match.captures = fragments[3] if && .length > 0 = .tr("\\", "/").gsub(" ", "%20") # NOTE: We want offset 4, not 3! offset = match.offset(4) uri = uri.dup uri[offset[0]...offset[1]] = end parsed = self.parse(uri) if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/ parsed = self.parse(hints[:scheme] + "://" + uri) end if parsed.path.include?(".") if parsed.path[/\b@\b/] parsed.scheme = "mailto" unless parsed.scheme elsif new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1] parsed.defer_validation do new_path = parsed.path.sub( Regexp.new("^" + Regexp.escape(new_host)), EMPTY_STR) parsed.host = new_host parsed.path = new_path parsed.scheme = hints[:scheme] unless parsed.scheme end end end return parsed end |