39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
# File 'lib/unicoder/builders/sequence_name.rb', line 39
def parse!
parse_file :named_sequences, :line, regex: /^(?!#)(?<name>.+?);(?<codepoints>.+?)$/ do |line|
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, line["name"]
end
parse_file :named_sequences_prov, :line, regex: /^(?!#)(?<name>.+?);(?<codepoints>.+?)$/ do |line|
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, line["name"]
end
parse_file :standardized_variants, :line, regex: /^(?<codepoints>.+?);\s*(?<variant>.+?)\s*;\s*(?<context>.*?)\s*# (?<name>.+)$/ do |line|
name = "#{line["name"].strip} (#{line["variant"]})"
name << " [#{line["context"]}]" if line["context"] && !line["context"].empty?
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, name, combine: true
end
parse_file :standardized_variants, :line, regex: /^(?<codepoints>.+?); (?<name>.+?)\s*;$/ do |line|
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, line["name"]
end
parse_file :ivd_sequences, :line, regex: /^(?<codepoints>.+?);.*?; (?<name>.+?)$/ do |line|
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, line["name"], combine: true
end
parse_file :emoji_variation_sequences, :line, regex: /^(?<codepoints>.+?)\s*;\s*(?<variant>.+?)\s*;\s*# \(.*\)\s*(?<name>.+?)\s*$/ do |line|
name = "#{line["name"].strip} (#{line["variant"]})"
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, name
end
parse_file :emoji_sequences, :line, regex: /^(?<codepoints>.+?)\s*;\s*(?<type>.+?)\s*; (?<name>.+?)\s*#/ do |line|
next if line["type"] == "Basic_Emoji"
name = line["name"].gsub(/\\x{(\h+)}/){ [$1.to_i(16)].pack("U") }.upcase
assign_codepoint line["codepoints"].split.map{|cp| cp.to_i(16) }, name
end
parse_file :emoji_zwj_sequences, :line, regex: /^(?!#)(?<codepoints>.+?)\s*;.*?; (?<name>.+?)\s*#/ do |line|
name = line["name"].gsub(/\\x{(\h+)}/){ [$1.to_i(16)].pack("U") }.upcase
codepoints = line["codepoints"].split.map{|cp| cp.to_i(16) }
assign_codepoint codepoints, name
if codepoints.include?(0xFE0F)
sequence = codepoints.pack("U*")
codepoints.slice_after(0xFE0F).reduce([[]]){|acc,cur|
if cur.include? 0xFE0F
acc.flat_map{|prev| [prev + (cur - [0xFE0F]), prev + cur] }
else
acc.map{|prev| prev + cur}
end
}.
select {|sub_codepoints| sub_codepoints != codepoints }.
each { |sub_codepoints|
sub_sequence = sub_codepoints.pack("U*")
@index[:EMOJI_NOT_QUALIFIED][sub_sequence] = sequence
}
end
end
replace_common_words! :SEQUENCES, @words, REPLACE_COUNT, REPLACE_BASE, REPLACE_MIN_WORD_LENGTH
end
|