Class: Unicoder::Builder::Name

Inherits:
Object
  • Object
show all
Includes:
Unicoder::Builder
Defined in:
lib/unicoder/builders/name.rb

Constant Summary collapse

JAMO_INITIAL =
4352
JAMO_MEDIAL =
4449
JAMO_FINAL =
4520
JAMO_END =
4697

Instance Attribute Summary

Attributes included from Unicoder::Builder

#formats, #index, #option

Instance Method Summary collapse

Methods included from Unicoder::Builder

#assign, #assign_codepoint, build, #export, #initialize, #meta, #parse_file

Instance Method Details

#initialize_indexObject



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/unicoder/builders/name.rb', line 11

def initialize_index
  @index = {
    NAMES: {},
    ALIASES: {},
    CJK: [],
    HANGUL: [],
    # see https://en.wikipedia.org/wiki/Korean_language_and_computers#Hangul_Syllables_Area
    JAMO: {
      INITIAL: [],
      MEDIAL: [],
      FINAL: [""],
    },
  }
  @range_start = nil
end

#parse!Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/unicoder/builders/name.rb', line 27

def parse!
  if option =~ /charkeys/
    get_key = ->(codepoint){ [codepoint].pack("U*") }
  else
    get_key = -> (codepoint){ codepoint }
  end

  parse_file :unicode_data, :line, regex: /^(?<codepoint>.+?);(?<name>.+?);.*$/ do |line|
    if line["name"][0] == "<" && line["name"][-1] == ">"
      if line["name"] =~ /First/
        @range_start = line["codepoint"].to_i(16)
      elsif line["name"] =~ /Last/ && @range_start
        if line["name"] =~ /Hangul/
          @index[:HANGUL] << [@range_start, line["codepoint"].to_i(16)]
        elsif line["name"] =~ /CJK/
          @index[:CJK] << [@range_start, line["codepoint"].to_i(16)]
        else
          # no name
        end
        @range_start = nil
      elsif line["name"] != "<control>"
        raise ArgumentError, "inconsistent range found in data, don't know what to do"
      end
    else
      assign :NAMES, line["codepoint"].to_i(16), line["name"]
    end
  end

  parse_file :name_aliases, :line, regex: /^(?<codepoint>.+?);(?<alias>.+?);(?<type>.*)$/ do |line|
    @index[:ALIASES][get_key[line["codepoint"].to_i(16)]] ||= {}
    @index[:ALIASES][get_key[line["codepoint"].to_i(16)]][line["type"].to_sym] ||= []
    @index[:ALIASES][get_key[line["codepoint"].to_i(16)]][line["type"].to_sym] << line["alias"]
  end

  parse_file :jamo, :line, regex: /^(?<codepoint>.+?); (?<short_name>.*?) +#.*$/ do |line|
    case line["codepoint"].to_i(16)
    when JAMO_INITIAL...JAMO_MEDIAL
      @index[:JAMO][:INITIAL] << line["short_name"]
    when JAMO_MEDIAL...JAMO_FINAL
      @index[:JAMO][:MEDIAL] << line["short_name"]
    when JAMO_FINAL..JAMO_END
      @index[:JAMO][:FINAL] << line["short_name"]
    end
  end
end