Class: Unicoder::Builder::Confusable

Inherits:
Object
  • Object
show all
Includes:
Unicoder::Builder
Defined in:
lib/unicoder/builders/confusable.rb

Instance Attribute Summary

Attributes included from Unicoder::Builder

#formats, #index, #option

Instance Method Summary collapse

Methods included from Unicoder::Builder

#assign, #assign_codepoint, build, #export, #initialize, #meta, #parse_file

Instance Method Details

#initialize_indexObject



6
7
8
9
10
11
# File 'lib/unicoder/builders/confusable.rb', line 6

def initialize_index
  @index = {
    CONFUSABLE: {},
    IGNORABLE: [],
  }
end

#parse!Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/unicoder/builders/confusable.rb', line 13

def parse!
  parse_file :confusables, :line, regex: /^(?<from>\S+)\s+;\s+(?<to>.+?)\s+;.*$/ do |line|
    source = line["from"].to_i(16)
    if line["to"].include?(" ")
      replace_with = line["to"].split(" ").map{ |codepoint|
        cp = codepoint.to_i(16)
        option =~ /charvalues/ ? [cp].pack("U") : cp
      }
    else
      cp = line["to"].to_i(16)
      replace_with = option =~ /charvalues/ ? [cp].pack("U") : cp
    end
    assign :CONFUSABLE, source, replace_with
  end

  parse_file :core_properties, :line, begin: /^# Derived Property: Default_Ignorable_Code_Point$/, end: /^# ================================================$/, regex: /^(?<codepoints>\S+)\s+; Default_Ignorable_Code_Point.*$/ do |line|
    if line["codepoints"]['..']
      single_or_multiple_codepoints = line["codepoints"].split('..').map{ |codepoint|
        codepoint.to_i(16)
      }
    else
      single_or_multiple_codepoints = line["codepoints"].to_i(16)
    end

    @index[:IGNORABLE] << single_or_multiple_codepoints
  end
end