Module: Forkforge::UnicodeData

Extended by:
UnicodeData
Includes:
UnicodeOrgFileFormat
Included in:
UnicodeData
Defined in:
lib/forkforge/internal/unicode_data.rb

Constant Summary collapse

LOCAL =
'data'
REMOTE =
'Public/UCD/latest/ucd'
FILE =
'UnicodeData.txt'

Constants included from UnicodeOrgFileFormat

Forkforge::UnicodeOrgFileFormat::HOST

Instance Method Summary collapse

Methods included from UnicodeOrgFileFormat

#__to_char, #__to_code_point, #i_grab, #i_hash, #i_load

Instance Method Details

#code_pointsObject



22
23
24
# File 'lib/forkforge/internal/unicode_data.rb', line 22

def code_points
  @codepoints ||= CodePoints.new hash
end

#compose_cp(cp, tag = :font, thorough = true) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
# File 'lib/forkforge/internal/unicode_data.rb', line 60

def compose_cp cp, tag = :font, thorough = true
  cp = __to_code_point cp
  return Forkforge::CodePoint.new(hash[cp]) unless (t = CharacterDecompositionMapping::Tag.tag(tag)).valid?

  @cdm[tag] = all_character_decomposition_mapping(/#{t.tag}/).values if @cdm[tag].nil?
  # FIXME Could we distinguish “<wide> 0ABC” and “0A00 0ABC” in more elegant way?
  lmbd = ->(v) { v[:character_decomposition_mapping] =~ /[^\dA-Fa-f]\s+#{cp}\Z/ }
  thorough ? \
    @cdm[tag].select(&lmbd).map { |cp| Forkforge::CodePoint.new(cp) } :
    Forkforge::CodePoint.new(@cdm[tag].find(&lmbd) || hash[cp])
end

#decompose_cp(cp, tags = []) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/forkforge/internal/unicode_data.rb', line 72

def decompose_cp cp, tags = []
  normalized = __to_code_point cp
  mapping = get_character_decomposition_mapping cp
  return normalized if mapping.vacant?

  cps = mapping.split ' '

  return normalized if ![*tags].vacant? && \
    cps.inject(false) { |memo, cp|
      memo || (CharacterDecompositionMapping::Tag::tag?(cp) && ![*tags].include?(CharacterDecompositionMapping::Tag::tag(cp).sym))
    }

  cps.reject { |cp|
    Forkforge::CharacterDecompositionMapping::Tag::tag? cp
  }.map { |cp| decompose_cp cp, tags }
end

#hashObject



18
19
20
# File 'lib/forkforge/internal/unicode_data.rb', line 18

def hash
  i_hash(REMOTE, LOCAL, FILE, CodePoint::UNICODE_FIELDS, false)
end

#info(cp) ⇒ Object



26
27
28
29
# File 'lib/forkforge/internal/unicode_data.rb', line 26

def info cp
  cp = cp.codepoints.first if String === cp && cp.length == 1
  hash[__to_code_point(cp)]
end

#infos(string) ⇒ Object



31
32
33
# File 'lib/forkforge/internal/unicode_data.rb', line 31

def infos string
  string.codepoints.map { |cp| hash[__to_code_point(cp)] }
end

#to_char(cp, action = :code_point) ⇒ Object

TODO return true/false whether the normalization was done?



36
37
38
39
# File 'lib/forkforge/internal/unicode_data.rb', line 36

def to_char cp, action = :code_point
  elem = hash[__to_code_point(cp)]
  __to_char(elem[action].vacant? ? elem[:code_point] : elem[action])
end

#to_codepoint(cp) ⇒ Object



41
42
43
# File 'lib/forkforge/internal/unicode_data.rb', line 41

def to_codepoint cp
  Forkforge::CodePoint.new info cp
end