Module: Unicode::Categories
- Defined in:
- lib/unicode/categories.rb,
lib/unicode/categories/index.rb,
lib/unicode/categories/constants.rb
Constant Summary collapse
- VERSION =
"1.9.0"
- UNICODE_VERSION =
"15.1.0"
- DATA_DIRECTORY =
File.(File.dirname(__FILE__) + "/../../../data/").freeze
- INDEX_FILENAME =
(DATA_DIRECTORY + "/categories.marshal.gz").freeze
Class Method Summary collapse
- .categories(string, **options) ⇒ Object (also: of)
- .category(char, format: :short) ⇒ Object
- .names(format: :short) ⇒ Object
Class Method Details
.categories(string, **options) ⇒ Object Also known as: of
5 6 7 8 9 10 11 12 |
# File 'lib/unicode/categories.rb', line 5 def self.categories(string, **) res = [] string.each_char{ |char| category_name = category(char, **) res << category_name unless res.include?(category_name) } res.sort end |
.category(char, format: :short) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/unicode/categories.rb', line 15 def self.category(char, format: :short) require_relative 'categories/index' unless defined? ::Unicode::Categories::INDEX codepoint_depth_offset = char.ord or raise(ArgumentError, "Unicode::Categories.category must be given a valid char") index_or_value = INDEX[:CATEGORIES] [0x10000, 0x1000, 0x100, 0x10].each{ |depth| index_or_value = index_or_value[codepoint_depth_offset / depth] codepoint_depth_offset = codepoint_depth_offset % depth unless index_or_value.is_a? Array res = index_or_value || "Cn" return format == :long ? INDEX[:CATEGORY_NAMES][res] : res end } res = index_or_value[codepoint_depth_offset] || "Cn" format == :long ? INDEX[:CATEGORY_NAMES][res] : res end |
.names(format: :short) ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/unicode/categories.rb', line 33 def self.names(format: :short) require_relative 'categories/index' unless defined? ::Unicode::Categories::INDEX case format when :long INDEX[:CATEGORY_NAMES].values.sort when :short INDEX[:CATEGORY_NAMES].keys.sort when :table INDEX[:CATEGORY_NAMES].dup end end |