Module: Forkforge::UnicodeData

Extended by:: UnicodeData

Includes:: UnicodeOrgFileFormat

Included in:: UnicodeData

Defined in:: lib/forkforge/internal/unicode_data.rb

Constant Summary collapse

LOCAL =

'data'

REMOTE =

'Public/UCD/latest/ucd'

FILE =

'UnicodeData.txt'

Constants included from UnicodeOrgFileFormat

Forkforge::UnicodeOrgFileFormat::HOST

Instance Method Summary collapse

#code_points ⇒ Object
#compose_cp(cp, tag = :font, thorough = true) ⇒ Object
#decompose_cp(cp, tags = []) ⇒ Object
#hash ⇒ Object
#info(cp) ⇒ Object
#infos(string) ⇒ Object
#to_char(cp, action = :code_point) ⇒ Object
TODO return true/false whether the normalization was done?.
#to_codepoint(cp) ⇒ Object

Methods included from UnicodeOrgFileFormat

#__to_char, #__to_code_point, #i_grab, #i_hash, #i_load

Instance Method Details

#code_points ⇒ `Object`



22
23
24

# File 'lib/forkforge/internal/unicode_data.rb', line 22

def code_points
  @codepoints ||= CodePoints.new hash
end

#compose_cp(cp, tag = :font, thorough = true) ⇒ `Object`

# File 'lib/forkforge/internal/unicode_data.rb', line 60

def compose_cp cp, tag = :font, thorough = true
  cp = __to_code_point cp
  return Forkforge::CodePoint.new(hash[cp]) unless (t = CharacterDecompositionMapping::Tag.tag(tag)).valid?

  @cdm[tag] = all_character_decomposition_mapping(/#{t.tag}/).values if @cdm[tag].nil?
  # FIXME Could we distinguish “<wide> 0ABC” and “0A00 0ABC” in more elegant way?
  lmbd = ->(v) { v[:character_decomposition_mapping] =~ /[^\dA-Fa-f]\s+#{cp}\Z/ }
  thorough ? \
    @cdm[tag].select(&lmbd).map { |cp| Forkforge::CodePoint.new(cp) } :
    Forkforge::CodePoint.new(@cdm[tag].find(&lmbd) || hash[cp])
end

#decompose_cp(cp, tags = []) ⇒ `Object`

# File 'lib/forkforge/internal/unicode_data.rb', line 72

def decompose_cp cp, tags = []
  normalized = __to_code_point cp
  mapping = get_character_decomposition_mapping cp
  return normalized if mapping.vacant?

  cps = mapping.split ' '

  return normalized if ![*tags].vacant? && \
    cps.inject(false) { |memo, cp|
      memo || (CharacterDecompositionMapping::Tag::tag?(cp) && ![*tags].include?(CharacterDecompositionMapping::Tag::tag(cp).sym))
    }

  cps.reject { |cp|
    Forkforge::CharacterDecompositionMapping::Tag::tag? cp
  }.map { |cp| decompose_cp cp, tags }
end

#hash ⇒ `Object`



18
19
20

# File 'lib/forkforge/internal/unicode_data.rb', line 18

def hash
  i_hash(REMOTE, LOCAL, FILE, CodePoint::UNICODE_FIELDS, false)
end

#info(cp) ⇒ `Object`

# File 'lib/forkforge/internal/unicode_data.rb', line 26

def info cp
  cp = cp.codepoints.first if String === cp && cp.length == 1
  hash[__to_code_point(cp)]
end

#infos(string) ⇒ `Object`



31
32
33

# File 'lib/forkforge/internal/unicode_data.rb', line 31

def infos string
  string.codepoints.map { |cp| hash[__to_code_point(cp)] }
end

#to_char(cp, action = :code_point) ⇒ `Object`

TODO return true/false whether the normalization was done?

# File 'lib/forkforge/internal/unicode_data.rb', line 36

def to_char cp, action = :code_point
  elem = hash[__to_code_point(cp)]
  __to_char(elem[action].vacant? ? elem[:code_point] : elem[action])
end

#to_codepoint(cp) ⇒ `Object`



41
42
43

# File 'lib/forkforge/internal/unicode_data.rb', line 41

def to_codepoint cp
  Forkforge::CodePoint.new info cp
end

Module: Forkforge::UnicodeData

Constant Summary collapse

Constants included from UnicodeOrgFileFormat

Instance Method Summary collapse

Methods included from UnicodeOrgFileFormat

Instance Method Details

#code_points ⇒ Object

#compose_cp(cp, tag = :font, thorough = true) ⇒ Object

#decompose_cp(cp, tags = []) ⇒ Object

#hash ⇒ Object

#info(cp) ⇒ Object

#infos(string) ⇒ Object

#to_char(cp, action = :code_point) ⇒ Object

#to_codepoint(cp) ⇒ Object

#code_points ⇒ `Object`

#compose_cp(cp, tag = :font, thorough = true) ⇒ `Object`

#decompose_cp(cp, tags = []) ⇒ `Object`

#hash ⇒ `Object`

#info(cp) ⇒ `Object`

#infos(string) ⇒ `Object`

#to_char(cp, action = :code_point) ⇒ `Object`

#to_codepoint(cp) ⇒ `Object`