Module: URLify

Defined in:
lib/urlify.rb,
lib/urlify/accents.rb

Constant Summary collapse

URLIFY_PATH =
File.expand_path(File.dirname(__FILE__)) + '/urlify/'
ACCENTMAP =
{
'À' => 'A',
'Á' => 'A',
'Â' => 'A',
'Ã' => 'A',
'Ä' => 'A',
'Å' => 'AA',
'Æ' => 'AE',
'Ç' => 'C',
'È' => 'E',
'É' => 'E',
'Ê' => 'E',
'Ë' => 'E',
'Ì' => 'I',
'Í' => 'I',
'Î' => 'I',
'Ï' => 'I',
'Ð' => 'D',
'Ñ' => 'N',
'Ò' => 'O',
'Ó' => 'O',
'Ô' => 'O',
'Õ' => 'O',
'Ö' => 'O',
'Ø' => 'OE',
'Ù' => 'U',
'Ú' => 'U',
'Ü' => 'U',
'Û' => 'U',
'Ý' => 'Y',
'Þ' => 'Th',
'ß' => 'ss',
'à' => 'a',
'á' => 'a',
'â' => 'a',
'ã' => 'a',
'ä' => 'a',
'å' => 'aa',
'æ' => 'ae',
'ç' => 'c',
'è' => 'e',
'é' => 'e',
'ê' => 'e',
'ë' => 'e',
'ì' => 'i',
'í' => 'i',
'î' => 'i',
'ï' => 'i',
'ð' => 'd',
'ñ' => 'n',
'ò' => 'o',
'ó' => 'o',
'ô' => 'o',
'õ' => 'o',
'ō' => 'o',
'ö' => 'o',
'ø' => 'oe',
'ù' => 'u',
'ú' => 'u',
'û' => 'u',
'ū' => 'u',
'ü' => 'u',
'ý' => 'y',
'þ' => 'th',
'ÿ' => 'y',
'Œ' => 'OE',
'œ' => 'oe',
'&' => 'and'}

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.deaccentuate(string) ⇒ Object

Removes diacritics from an input string’s characters.

So a lowercase ‘u’ with an umlaut, ü, becomes u, while an uppercase ‘A’ with an acute accent, Á, becomes A. This method is UTF-8 safe.



43
44
45
46
47
# File 'lib/urlify.rb', line 43

def self.deaccentuate(string)
  (RUBY_VERSION >= "1.9.0" ? string.chars : string.split(//u)).map {|c|
    ACCENTMAP[c] || c
  }.join("")
end

.strip_subtitle(string) ⇒ Object

Removes everything from a string after the first colon.

Ensures that titles with really long subtitles don’t convert to equally long permalinks.



35
36
37
# File 'lib/urlify.rb', line 35

def self.strip_subtitle(string)
  string.split(/\s*\:\s*/).first
end

.urlify(string, separator = "_") ⇒ Object

Converts an input string into a URL-safe string.

  • Leading and trailing whitespace is removed.

  • Diacritics are removed from all characters.

  • All letters are converted to lower case.

  • Remaining whitespace is replaced with separators.

  • Any remaining character which is not a letter, a digit or a valid separator is removed.

Only underscores, dashes, plus signs and the empty string are allowed as separators, although combinations are permitted, so “_”, “–”, “+_-” and “” are all valid separators.



20
21
22
23
24
25
26
27
28
29
# File 'lib/urlify.rb', line 20

def self.urlify(string, separator = "_")
  unless separator =~ /^[\-\_\+]*$/
    separator = "_"
  end
  
  deaccentuate(strip_subtitle(string.strip)).
    downcase.
    gsub(/\s/, separator).
    gsub(/[^a-z\d\_\-\+]/, "")
end

Instance Method Details

#deaccentuateObject

Instance method version of URLify.deaccentuate, so that the library can be used as a mixin for the String class.



63
64
65
# File 'lib/urlify.rb', line 63

def deaccentuate
  URLify.deaccentuate(self)
end

#strip_subtitleObject

Instance method version of URLify.strip_subtitle, so that the library can be used as a mixin for the String class.



57
58
59
# File 'lib/urlify.rb', line 57

def strip_subtitle
  URLify.strip_subtitle(self)
end

#urlify(separator = "_") ⇒ Object

Instance method version of URLify.urlify, so that the library can be used as a mixin for the String class.



51
52
53
# File 'lib/urlify.rb', line 51

def urlify(separator = "_")
  URLify.urlify(self, separator)
end