Module: StringUtils
- Extended by:
- StringUtils
- Included in:
- StringUtils
- Defined in:
- lib/string_utils.rb,
lib/string_utils/version.rb,
lib/string_utils/transliteration.rb
Overview
StringUtils is a library that provides various handy string manipulation methods Example usage:
* StringUtils.truncate("hello world", 10, "...") #=> "hello..."
* StringUtils.normalize_name "\302\240 Gran Via/Avda.de Asturias " #=> :Gran Via / Avda. de Asturias"
* StringUtils.urlify("waßer") #=> "wasser"
Constant Summary collapse
- NBSP =
"\302\240"
- WHITESPACE_MATCHER =
"(?:\s|#{NBSP})"
- WHITESPACE =
/#{WHITESPACE_MATCHER}/
- NOT_WHITESPACE =
"[^\s#{NBSP}]"
- WHITESPACES =
/#{WHITESPACE_MATCHER}+/
- VERSION =
"1.0.4"
- TRANSLITERATIONS =
Based on transliteration table from i18n v0.5.0
{ # Latin "À" =>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE", "Ç" =>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I", "Î" =>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O", "Õ" =>"O", "Ö"=>"O", "×"=>"x", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U", "Ü" =>"U", "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a", "ã" =>"a", "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e", "ê" =>"e", "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d", "ñ" =>"n", "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o", "ù" =>"u", "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y", "Ā" =>"A", "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C", "ć" =>"c", "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c", "Ď" =>"D", "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E", "ĕ" =>"e", "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e", "Ĝ" =>"G", "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G", "ģ" =>"g", "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i", "Ī" =>"I", "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I", "ı" =>"i", "IJ"=>"IJ", "ij"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k", "ĸ" =>"k", "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l", "Ŀ" =>"L", "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N", "ņ" =>"n", "Ň"=>"N", "ň"=>"n", "ʼn"=>"'n", "Ŋ"=>"NG", "ŋ"=>"ng", "Ō" =>"O", "ō"=>"o", "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE", "œ" =>"oe", "Ŕ"=>"R", "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r", "Ś" =>"S", "ś"=>"s", "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S", "š" =>"s", "Ţ"=>"T", "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t", "Ũ" =>"U", "ũ"=>"u", "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U", "ů" =>"u", "Ű"=>"U", "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w", "Ŷ" =>"Y", "ŷ"=>"y", "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z", "Ž" =>"Z", "ž"=>"z", # Cyrillic "Ґ" =>"G", "Ё"=>"YO", "Є"=>"E", "Ї"=>"YI", "І"=>"I", "А" =>"A", "Б"=>"B", "В"=>"V", "Г"=>"G", "Д" =>"D", "Е"=>"E", "Ж"=>"ZH", "З"=>"Z", "И"=>"I", "Й" =>"Y", "К"=>"K", "Л"=>"L", "М"=>"M", "Н"=>"N", "О" =>"O", "П"=>"P", "Р"=>"R", "С"=>"S", "Т"=>"T", "У" =>"U", "Ф"=>"F", "Х"=>"H", "Ц"=>"TS", "Ч"=>"CH", "Ш" =>"SH", "Щ"=>"SCH", "Ъ"=>"'", "Ы"=>"Y", "Ь"=>"", "Э" =>"E", "Ю"=>"YU", "Я"=>"YA", "і"=>"i", "ґ" =>"g", "ё"=>"yo", "№"=>"#", "є"=>"e", "ї" =>"yi", "а"=>"a", "б"=>"b", "в" =>"v", "г"=>"g", "д"=>"d", "е"=>"e", "ж"=>"zh", "з" =>"z", "и"=>"i", "й"=>"y", "к"=>"k", "л"=>"l", "м" =>"m", "н"=>"n", "о"=>"o", "п"=>"p", "р"=>"r", "с" =>"s", "т"=>"t", "у"=>"u", "ф"=>"f", "х"=>"h", "ц" =>"ts", "ч"=>"ch", "ш"=>"sh", "щ"=>"sch", "ъ"=>"'", "ы" =>"y", "ь"=>"", "э"=>"e", "ю"=>"yu", "я"=>"ya", # Greek 'α' => 'a', 'η' => 'h', 'ν' => 'n', 'τ' => 't', 'β' => 'b', 'θ' => 'th', 'ξ' => 'x', 'υ' => 'y', 'γ' => 'g', 'ι' => 'i', 'ο' => 'o', 'φ' => 'f', 'δ' => 'd', 'κ' => 'k', 'π' => 'p', 'χ' => 'ch', 'ε' => 'e', 'λ' => 'l', 'ρ' => 'r', 'ψ' => 'ps', 'ζ' => 'z', 'μ' => 'm', 'σ' => 's', 'ω' => 'w', 'Θ' => 'Th', 'Ξ' => 'X', 'Γ' => 'G', 'Φ' => 'F', 'Δ' => 'D', 'Π' => 'P', 'Λ' => 'L', 'Ρ' => 'R', 'Ψ' => 'Ps', 'Σ' => 'S', 'Ω' => 'W' }
Instance Method Summary collapse
-
#mb_charify(text) ⇒ Object
Returns a unicode compatible version of the string.
-
#normalize_name(value, options = {}) ⇒ Object
Normalizes whitespace “a , a” => “a, a” “a ,a” => “a, a” “a,a” => “a, a” “a/b” => “a / b”, “a/ b” => “a / b”, “a /b” => “a / b” Removes trailing and leading [.,] options: => true (default false).
-
#truncate(text, *args) ⇒ Object
Truncates the string The result will be
:length
or shorter, and the words will not be cut in the middle Arguments: :length => Integer (default: 30) :omission => String (default: “…”). -
#urlify(string, opts = {}) ⇒ Object
Converts a string to a nicely readable URL opts: :default_replacement – string to use for unknown characters (Default: “”) :whitespace_replacement – string to use to replace whitespace+ (Default: “-”).
Instance Method Details
#mb_charify(text) ⇒ Object
Returns a unicode compatible version of the string
support any of:
* ruby 1.9 sane utf8 support
* rails 2.1 workaround for crappy ruby 1.8 utf8 support
* rails 2.2 workaround for crappy ruby 1.8 utf8 support
hooray!
135 136 137 138 139 140 141 142 143 |
# File 'lib/string_utils.rb', line 135 def mb_charify(text) if RUBY_VERSION >= '1.9' text.dup elsif text.respond_to?(:mb_chars) text.frozen? ? text.dup.mb_chars : text.mb_chars else raise "StringUtils: No unicode support for strings" end end |
#normalize_name(value, options = {}) ⇒ Object
Normalizes whitespace “a , a” => “a, a” “a ,a” => “a, a” “a,a” => “a, a” “a/b” => “a / b”, “a/ b” => “a / b”, “a /b” => “a / b” Removes trailing and leading [.,] options: => true (default false)
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/string_utils.rb', line 45 def normalize_name(value, = {}) value = mb_charify(value) # Normalize whitespace value.gsub!("\n", ' ') value.gsub!(WHITESPACES, ' ') value.strip! # Remove trailing and leading ., value.gsub!(/^[.,]/, '') value.gsub!(/[.,]$/, '') # "a ,a" => "a, a" # "a,a" => "a, a" # "a , a" => "a, a" value.gsub!(/#{WHITESPACE_MATCHER}([,.])/, '\1') value.gsub!(/([,.])(#{NOT_WHITESPACE})/, '\1 \2') # "//" => "/" value.gsub!(/\/+/, '/') # "a/b" => "a / b", "a/ b" => "a / b", "a /b" => "a / b" value.gsub!(/(#{NOT_WHITESPACE})\//, '\1 /') value.gsub!(/\/(#{NOT_WHITESPACE})/, '/ \1') if [:titleize] value = value.titleize value.gsub!(/#{WHITESPACE_MATCHER}(Of|And|A|An|The|To)#{WHITESPACE_MATCHER}/) { |m| "#{m.downcase}" } end value.to_s end |
#truncate(text, *args) ⇒ Object
Truncates the string The result will be :length
or shorter, and the words will not be cut in the middle Arguments: :length => Integer (default: 30) :omission => String (default: “…”)
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/string_utils.rb', line 82 def truncate(text, *args) = args.last.is_a?(Hash) ? args.pop : {} # support either old or Rails 2.2 calling convention: unless args.empty? [:length] = args[0] || 30 [:omission] = args[1] || "…" end = {:length => 30, :omission => "…"}.merge() [:length] = [:length].to_i return "" if !text chars = mb_charify(text) # If we can return it straight away or rstrip it and return it, we do it here if chars.length <= [:length] return text elsif (chars = rstrip_with_nbsp(chars)).length <= [:length] return chars.to_s end omission = mb_charify([:omission]) # Here we know we have to remove at least 1 word # 1. Get the first l characters # 2. Remove the last word if it's a part # 3. Add omission length_wo_omission = [:length] - omission.length return '' if length_wo_omission < 0 result = rstrip_with_nbsp(chars[0...length_wo_omission] || "") # Remove the last word unless we happened to trim it exactly already unless chars[length_wo_omission] =~ WHITESPACE || result.length < length_wo_omission len = result.split(WHITESPACES).last len &&= len.length result = rstrip_with_nbsp(result[0...(result.length - (len || 0))]) end result += [:omission] result.to_s end |
#urlify(string, opts = {}) ⇒ Object
Converts a string to a nicely readable URL opts: :default_replacement – string to use for unknown characters (Default: “”) :whitespace_replacement – string to use to replace whitespace+ (Default: “-”)
29 30 31 32 33 34 35 36 |
# File 'lib/string_utils.rb', line 29 def urlify(string, opts = {}) opts = {:whitespace_replacement => '-', :default_replacement => ""}.merge(opts) string = string.gsub(WHITESPACES, opts[:whitespace_replacement]) string.strip! string.gsub!(/[^\x00-\x7f]/u) { |char| TRANSLITERATIONS[char] || opts[:default_replacement] } string.gsub!(/[^a-z0-9\-+_]/, opts[:default_replacement]) string end |