Module: AnyStyle::StringUtils
- Included in:
- Document, Document::Page, Feature, ParserCore
- Defined in:
- lib/anystyle/utils.rb
Class Method Summary collapse
- .canonize(string) ⇒ Object
- .count(string, pattern) ⇒ Object
- .display_chars(string) ⇒ Object
- .display_width(string) ⇒ Object
- .indent(token) ⇒ Object
- .page_break?(string) ⇒ Boolean
- .scrub(string, blacklist: /[\p{^Alnum}\p{Lm}]/) ⇒ Object
- .transliterate(string, form: :nfkd) ⇒ Object
Class Method Details
.canonize(string) ⇒ Object
24 25 26 |
# File 'lib/anystyle/utils.rb', line 24 def canonize(string) scrub(transliterate(string)).downcase end |
.count(string, pattern) ⇒ Object
42 43 44 |
# File 'lib/anystyle/utils.rb', line 42 def count(string, pattern) string.to_enum(:scan, pattern).inject(0) { |c| c + 1 } end |
.display_chars(string) ⇒ Object
36 37 38 39 40 |
# File 'lib/anystyle/utils.rb', line 36 def display_chars(string) string .gsub(/\p{Mn}|\p{Me}|\p{Cc}/, '') .gsub(/\p{Zs}/, ' ') end |
.display_width(string) ⇒ Object
32 33 34 |
# File 'lib/anystyle/utils.rb', line 32 def display_width(string) display_chars(string).rstrip.length end |
.indent(token) ⇒ Object
46 47 48 |
# File 'lib/anystyle/utils.rb', line 46 def indent(token) display_chars(token).rstrip[/^(\s*)/].length end |
.page_break?(string) ⇒ Boolean
28 29 30 |
# File 'lib/anystyle/utils.rb', line 28 def page_break?(string) string =~ /\f/ end |
.scrub(string, blacklist: /[\p{^Alnum}\p{Lm}]/) ⇒ Object
14 15 16 |
# File 'lib/anystyle/utils.rb', line 14 def scrub(string, blacklist: /[\p{^Alnum}\p{Lm}]/) string.scrub.gsub(blacklist, '') end |
.transliterate(string, form: :nfkd) ⇒ Object
18 19 20 21 22 |
# File 'lib/anystyle/utils.rb', line 18 def transliterate(string, form: :nfkd) string .unicode_normalize(form) .gsub(/\p{Mark}/, '') end |