Module: Unicode::Types

Defined in:
lib/unicode/types.rb,
lib/unicode/types/index.rb,
lib/unicode/types/constants.rb

Constant Summary collapse

VERSION =
"1.8.0"
UNICODE_VERSION =
"15.0.0"
DATA_DIRECTORY =
File.expand_path(File.dirname(__FILE__) + "/../../../data/").freeze
INDEX_FILENAME =
(DATA_DIRECTORY + "/types.marshal.gz").freeze

Class Method Summary collapse

Class Method Details

.get_codepoint_value(char) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/unicode/types.rb', line 34

def self.get_codepoint_value(char)
  ord = nil

  if char.valid_encoding?
    ord = char.ord
  elsif char.encoding.name == "UTF-8"
    begin
      ord = char.unpack("U*")[0]
    rescue ArgumentError
    end
  end

  if ord
    ord
  else
    raise(ArgumentError, "Unicode::Types.type must be given a valid char")
  end
end

.namesObject



29
30
31
32
# File 'lib/unicode/types.rb', line 29

def self.names
  require_relative 'types/index' unless defined? ::Unicode::Types::INDEX
  INDEX[:TYPE_NAMES].dup
end

.type(char) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/unicode/types.rb', line 15

def self.type(char)
  require_relative 'types/index' unless defined? ::Unicode::Types::INDEX
  codepoint_depth_offset = get_codepoint_value(char)
  index_or_value = INDEX[:TYPES]
  [0x10000, 0x1000, 0x100, 0x10].each{ |depth|
    index_or_value         = index_or_value[codepoint_depth_offset / depth]
    codepoint_depth_offset = codepoint_depth_offset % depth
    unless index_or_value.is_a? Array
      return INDEX[:TYPE_NAMES][index_or_value.to_i]
    end
  }
  INDEX[:TYPE_NAMES][index_or_value[codepoint_depth_offset].to_i]
end

.types(string) ⇒ Object Also known as: of



5
6
7
8
9
10
11
12
# File 'lib/unicode/types.rb', line 5

def self.types(string)
  res = []
  string.each_char{ |char|
    type_name = type(char)
    res << type_name unless res.include?(type_name)
  }   
  res.sort
end