Module: Unicode::Categories

Defined in:
lib/unicode/categories.rb,
lib/unicode/categories/index.rb,
lib/unicode/categories/constants.rb

Constant Summary collapse

VERSION =
"1.5.0"
UNICODE_VERSION =
"12.1.0"
DATA_DIRECTORY =
File.expand_path(File.dirname(__FILE__) + "/../../../data/").freeze
INDEX_FILENAME =
(DATA_DIRECTORY + "/categories.marshal.gz").freeze

Class Method Summary collapse

Class Method Details

.categories(string, **options) ⇒ Object Also known as: of



5
6
7
8
9
10
11
12
# File 'lib/unicode/categories.rb', line 5

def self.categories(string, **options)
  res = []
  string.each_char{ |char|
    category_name = category(char, **options)
    res << category_name unless res.include?(category_name)
  }   
  res.sort
end

.category(char, format: :short) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/unicode/categories.rb', line 15

def self.category(char, format: :short)
  require_relative 'categories/index' unless defined? ::Unicode::Categories::INDEX
  codepoint_depth_offset = char.ord or
      raise(ArgumentError, "Unicode::Categories.category must be given a valid char")
  index_or_value = INDEX[:CATEGORIES]
  [0x10000, 0x1000, 0x100, 0x10].each{ |depth|
    index_or_value         = index_or_value[codepoint_depth_offset / depth]
    codepoint_depth_offset = codepoint_depth_offset % depth
    unless index_or_value.is_a? Array
      res = index_or_value || "Cn"
      return format == :long ? INDEX[:CATEGORY_NAMES][res] : res
    end
  }

  res = index_or_value[codepoint_depth_offset] || "Cn"
  format == :long ? INDEX[:CATEGORY_NAMES][res] : res
end

.names(format: :short) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
# File 'lib/unicode/categories.rb', line 33

def self.names(format: :short)
  require_relative 'categories/index' unless defined? ::Unicode::Categories::INDEX
  case format
  when :long
    INDEX[:CATEGORY_NAMES].values.sort
  when :short
    INDEX[:CATEGORY_NAMES].keys.sort
  when :table
    INDEX[:CATEGORY_NAMES].dup
  end
end