Module: StringUtils
- Extended by:
- StringUtils
- Included in:
- StringUtils
- Defined in:
- lib/string_utils.rb,
lib/string_utils/version.rb,
lib/string_utils/transliteration.rb
Overview
StringUtils is a library that provides various handy string manipulation methods Example usage:
* StringUtils.truncate("hello world", 10, "...") #=> "hello..."
* StringUtils.normalize_name "\302\240 Gran Via/Avda.de Asturias " #=> :Gran Via / Avda. de Asturias"
* StringUtils.urlify("waßer") #=> "wasser"
* StringUtils.normalize_punctuation(" , a,,b ,") #=> "a, b"
Constant Summary collapse
- NBSP =
"\302\240"- WHITESPACE_MATCHER =
"(?:\s|#{NBSP})"- WHITESPACE =
/#{WHITESPACE_MATCHER}/- NOT_WHITESPACE =
"[^\s#{NBSP}]"- WHITESPACES =
/#{WHITESPACE_MATCHER}+/- VERSION =
"1.0.6"- TRANSLITERATIONS =
Based on transliteration table from i18n v0.5.0
{ # Latin "À" =>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE", "Ç" =>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I", "Î" =>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O", "Õ" =>"O", "Ö"=>"O", "×"=>"x", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U", "Ü" =>"U", "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a", "ã" =>"a", "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e", "ê" =>"e", "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d", "ñ" =>"n", "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o", "ù" =>"u", "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y", "Ā" =>"A", "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C", "ć" =>"c", "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c", "Ď" =>"D", "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E", "ĕ" =>"e", "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e", "Ĝ" =>"G", "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G", "ģ" =>"g", "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i", "Ī" =>"I", "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I", "ı" =>"i", "IJ"=>"IJ", "ij"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k", "ĸ" =>"k", "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l", "Ŀ" =>"L", "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N", "ņ" =>"n", "Ň"=>"N", "ň"=>"n", "ʼn"=>"'n", "Ŋ"=>"NG", "ŋ"=>"ng", "Ō" =>"O", "ō"=>"o", "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE", "œ" =>"oe", "Ŕ"=>"R", "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r", "Ś" =>"S", "ś"=>"s", "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S", "š" =>"s", "Ţ"=>"T", "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t", "Ũ" =>"U", "ũ"=>"u", "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U", "ů" =>"u", "Ű"=>"U", "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w", "Ŷ" =>"Y", "ŷ"=>"y", "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z", "Ž" =>"Z", "ž"=>"z", # Cyrillic "Ґ" =>"G", "Ё"=>"YO", "Є"=>"E", "Ї"=>"YI", "І"=>"I", "А" =>"A", "Б"=>"B", "В"=>"V", "Г"=>"G", "Д" =>"D", "Е"=>"E", "Ж"=>"ZH", "З"=>"Z", "И"=>"I", "Й" =>"Y", "К"=>"K", "Л"=>"L", "М"=>"M", "Н"=>"N", "О" =>"O", "П"=>"P", "Р"=>"R", "С"=>"S", "Т"=>"T", "У" =>"U", "Ф"=>"F", "Х"=>"H", "Ц"=>"TS", "Ч"=>"CH", "Ш" =>"SH", "Щ"=>"SCH", "Ъ"=>"'", "Ы"=>"Y", "Ь"=>"", "Э" =>"E", "Ю"=>"YU", "Я"=>"YA", "і"=>"i", "ґ" =>"g", "ё"=>"yo", "№"=>"#", "є"=>"e", "ї" =>"yi", "а"=>"a", "б"=>"b", "в" =>"v", "г"=>"g", "д"=>"d", "е"=>"e", "ж"=>"zh", "з" =>"z", "и"=>"i", "й"=>"y", "к"=>"k", "л"=>"l", "м" =>"m", "н"=>"n", "о"=>"o", "п"=>"p", "р"=>"r", "с" =>"s", "т"=>"t", "у"=>"u", "ф"=>"f", "х"=>"h", "ц" =>"ts", "ч"=>"ch", "ш"=>"sh", "щ"=>"sch", "ъ"=>"'", "ы" =>"y", "ь"=>"", "э"=>"e", "ю"=>"yu", "я"=>"ya", # Greek 'α' => 'a', 'η' => 'h', 'ν' => 'n', 'τ' => 't', 'β' => 'b', 'θ' => 'th', 'ξ' => 'x', 'υ' => 'y', 'γ' => 'g', 'ι' => 'i', 'ο' => 'o', 'φ' => 'f', 'δ' => 'd', 'κ' => 'k', 'π' => 'p', 'χ' => 'ch', 'ε' => 'e', 'λ' => 'l', 'ρ' => 'r', 'ψ' => 'ps', 'ζ' => 'z', 'μ' => 'm', 'σ' => 's', 'ω' => 'w', 'Θ' => 'Th', 'Ξ' => 'X', 'Γ' => 'G', 'Φ' => 'F', 'Δ' => 'D', 'Π' => 'P', 'Λ' => 'L', 'Ρ' => 'R', 'Ψ' => 'Ps', 'Σ' => 'S', 'Ω' => 'W' }
Instance Method Summary collapse
-
#mb_charify(text) ⇒ Object
Returns a unicode compatible version of the string.
-
#normalize_name(value, options = {}) ⇒ Object
Normalizes whitespace “a , a” => “a, a” “a ,a” => “a, a” “a,a” => “a, a” “a/b” => “a / b”, “a/ b” => “a / b”, “a /b” => “a / b” Removes trailing and leading [.,] options: => true (default false).
-
#normalize_punctuation(str) ⇒ Object
Collapses spaces and commas Fixes spacing around the [,.;:] Removes trailing and leading commas.
-
#truncate(text, *args) ⇒ Object
Truncates the string The result will be
:lengthor shorter, and the words will not be cut in the middle Arguments: :length => Integer (default: 30) :omission => String (default: “…”). -
#urlify(string, opts = {}) ⇒ Object
Converts a string to a nicely readable URL opts: :default_replacement – string to use for unknown characters (Default: “”) :whitespace_replacement – string to use to replace whitespace+ (Default: “-”).
Instance Method Details
#mb_charify(text) ⇒ Object
Returns a unicode compatible version of the string
support any of:
* ruby 1.9 sane utf8 support
* rails 2.1 workaround for crappy ruby 1.8 utf8 support
* rails 2.2 workaround for crappy ruby 1.8 utf8 support
hooray!
152 153 154 155 156 157 158 159 160 |
# File 'lib/string_utils.rb', line 152 def mb_charify(text) if RUBY_VERSION >= '1.9' text.dup elsif text.respond_to?(:mb_chars) text.frozen? ? text.dup.mb_chars : text.mb_chars else raise "StringUtils: No unicode support for strings" end end |
#normalize_name(value, options = {}) ⇒ Object
Normalizes whitespace “a , a” => “a, a” “a ,a” => “a, a” “a,a” => “a, a” “a/b” => “a / b”, “a/ b” => “a / b”, “a /b” => “a / b” Removes trailing and leading [.,] options: => true (default false)
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/string_utils.rb', line 59 def normalize_name(value, = {}) value = mb_charify(value) # Normalize whitespace value.gsub!("\n", ' ') value.gsub!(WHITESPACES, ' ') value.strip! # Remove trailing and leading ., value.gsub!(/^[.,]/, '') value.gsub!(/[.,]$/, '') # Remove quote pairs. Imperfect, but good enough value.gsub!(/\A['"]+(.*)['"]+\z/, '\1') # "a ,a" => "a, a" # "a,a" => "a, a" # "a , a" => "a, a" value.gsub!(/#{WHITESPACE_MATCHER}([,.])/, '\1') value.gsub!(/([,.])(#{NOT_WHITESPACE})/, '\1 \2') # "//" => "/" value.gsub!(/\/+/, '/') # "a/b" => "a / b", "a/ b" => "a / b", "a /b" => "a / b" value.gsub!(/(#{NOT_WHITESPACE})\//, '\1 /') value.gsub!(/\/(#{NOT_WHITESPACE})/, '/ \1') if [:titleize] value = value.titleize value.gsub!(/#{WHITESPACE_MATCHER}(Of|And|A|An|The|To)#{WHITESPACE_MATCHER}/) { |m| "#{m.downcase}" } end value.to_s end |
#normalize_punctuation(str) ⇒ Object
Collapses spaces and commas Fixes spacing around the [,.;:] Removes trailing and leading commas
28 29 30 31 32 33 34 35 36 37 |
# File 'lib/string_utils.rb', line 28 def normalize_punctuation(str) s = str.dup s.gsub! /\s+/, ' ' s.gsub! /\s,/, ',' s.gsub! /,+/ , ',' s.gsub! /^\s*,|,\s*$/, '' s.gsub! /([,.;:])(\S)/, '\1 \2' s.strip! s end |
#truncate(text, *args) ⇒ Object
Truncates the string The result will be :length or shorter, and the words will not be cut in the middle Arguments: :length => Integer (default: 30) :omission => String (default: “…”)
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/string_utils.rb', line 99 def truncate(text, *args) = args.last.is_a?(Hash) ? args.pop : {} # support either old or Rails 2.2 calling convention: unless args.empty? [:length] = args[0] || 30 [:omission] = args[1] || "…" end = {:length => 30, :omission => "…"}.merge() [:length] = [:length].to_i return "" if !text chars = mb_charify(text) # If we can return it straight away or rstrip it and return it, we do it here if chars.length <= [:length] return text elsif (chars = rstrip_with_nbsp(chars)).length <= [:length] return chars.to_s end omission = mb_charify([:omission]) # Here we know we have to remove at least 1 word # 1. Get the first l characters # 2. Remove the last word if it's a part # 3. Add omission length_wo_omission = [:length] - omission.length return '' if length_wo_omission < 0 result = rstrip_with_nbsp(chars[0...length_wo_omission] || "") # Remove the last word unless we happened to trim it exactly already unless chars[length_wo_omission] =~ WHITESPACE || result.length < length_wo_omission len = result.split(WHITESPACES).last len &&= len.length result = rstrip_with_nbsp(result[0...(result.length - (len || 0))]) end result += [:omission] result.to_s end |
#urlify(string, opts = {}) ⇒ Object
Converts a string to a nicely readable URL opts: :default_replacement – string to use for unknown characters (Default: “”) :whitespace_replacement – string to use to replace whitespace+ (Default: “-”)
43 44 45 46 47 48 49 50 |
# File 'lib/string_utils.rb', line 43 def urlify(string, opts = {}) opts = {:whitespace_replacement => '-', :default_replacement => ""}.merge(opts) string = string.gsub(WHITESPACES, opts[:whitespace_replacement]) string.strip! string.gsub!(/[^\x00-\x7f]/u) { |char| TRANSLITERATIONS[char] || opts[:default_replacement] } string.gsub!(/[^a-z0-9\-+_]/, opts[:default_replacement]) string end |