Class: Unicoder::Builder::Confusable
- Inherits:
-
Object
- Object
- Unicoder::Builder::Confusable
show all
- Includes:
- Unicoder::Builder
- Defined in:
- lib/unicoder/builders/confusable.rb
Instance Attribute Summary
#formats, #index, #option
Instance Method Summary
collapse
#assign, #assign_codepoint, build, #export, #initialize, #meta, #parse_file
Instance Method Details
#initialize_index ⇒ Object
6
7
8
9
10
11
|
# File 'lib/unicoder/builders/confusable.rb', line 6
def initialize_index
@index = {
CONFUSABLE: {},
IGNORABLE: [],
}
end
|
#parse! ⇒ Object
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
# File 'lib/unicoder/builders/confusable.rb', line 13
def parse!
parse_file :confusables, :line, regex: /^(?<from>\S+)\s+;\s+(?<to>.+?)\s+;.*$/ do |line|
source = line["from"].to_i(16)
if line["to"].include?(" ")
replace_with = line["to"].split(" ").map{ |codepoint|
cp = codepoint.to_i(16)
option =~ /charvalues/ ? [cp].pack("U") : cp
}
else
cp = line["to"].to_i(16)
replace_with = option =~ /charvalues/ ? [cp].pack("U") : cp
end
assign :CONFUSABLE, source, replace_with
end
parse_file :core_properties, :line, begin: /^# Derived Property: Default_Ignorable_Code_Point$/, end: /^# ================================================$/, regex: /^(?<codepoints>\S+)\s+; Default_Ignorable_Code_Point.*$/ do |line|
if line["codepoints"]['..']
single_or_multiple_codepoints = line["codepoints"].split('..').map{ |codepoint|
codepoint.to_i(16)
}
else
single_or_multiple_codepoints = line["codepoints"].to_i(16)
end
@index[:IGNORABLE] << single_or_multiple_codepoints
end
end
|