Module: Unicoder::Builder
- Included in:
- Blocks, Categories, Confusable, DisplayWidth, Emoji, Name, NumericValue, Scripts, SequenceName, Types
- Defined in:
- lib/unicoder/builder.rb,
lib/unicoder/builders/name.rb,
lib/unicoder/builders/emoji.rb,
lib/unicoder/builders/types.rb,
lib/unicoder/builders/blocks.rb,
lib/unicoder/builders/scripts.rb,
lib/unicoder/builders/categories.rb,
lib/unicoder/builders/confusable.rb,
lib/unicoder/builders/display_width.rb,
lib/unicoder/builders/numeric_value.rb,
lib/unicoder/builders/sequence_name.rb
Overview
A builder defines a parse function which translates one (ore more) unicode data files into an index hash
Defined Under Namespace
Classes: Blocks, Categories, Confusable, DisplayWidth, Emoji, Name, NumericValue, Scripts, SequenceName, Types
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
Instance Attribute Details
Returns the value of attribute formats.
8
9
10
|
# File 'lib/unicoder/builder.rb', line 8
def formats
@formats
end
|
#index ⇒ Object
Returns the value of attribute index.
8
9
10
|
# File 'lib/unicoder/builder.rb', line 8
def index
@index
end
|
#option ⇒ Object
Returns the value of attribute option.
8
9
10
|
# File 'lib/unicoder/builder.rb', line 8
def option
@option
end
|
Class Method Details
.build(identifier, **options) ⇒ Object
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
# File 'lib/unicoder/builder.rb', line 123
def self.build(identifier, **options)
format = options[:format] || :marshal
require_relative "builders/#{identifier}"
builder_class = self.const_get(identifier.to_s.gsub(/(?:^|_)([a-z])/){ $1.upcase })
builder = builder_class.new(
options[:unicode_version],
options[:emoji_version],
format
)
puts "Building index for #{identifier}…"
if options[:option]
builder.option = options[:option]
end
builder.parse!
index_file = builder.export(**options)
destination ||= options[:destination] || identifier.to_s
destination += "#{builder.formats.dig(format.to_sym, :ext)}"
destination += ".gz" if options[:gzip]
bytes = File.write destination, index_file
puts "Index created at: #{destination} (#{bytes} bytes written)"
end
|
Instance Method Details
#assign(sub_index_name, codepoint, value) ⇒ Object
55
56
57
|
# File 'lib/unicoder/builder.rb', line 55
def assign(sub_index_name, codepoint, value)
assign_codepoint(codepoint, value, index[sub_index_name])
end
|
#assign_codepoint(codepoint, value, idx = @index) ⇒ Object
47
48
49
50
51
52
53
|
# File 'lib/unicoder/builder.rb', line 47
def assign_codepoint(codepoint, value, idx = @index)
if option =~ /charkeys/
idx[[codepoint].pack("U*")] = value
else
idx[codepoint] = value
end
end
|
#export(format: :marshal, **options) ⇒ Object
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
# File 'lib/unicoder/builder.rb', line 97
def export(format: :marshal, **options)
p index if options[:verbose]
if options[:meta]
idx = meta.merge(index)
else
idx = index
end
case format.to_sym
when :marshal
index_file = Marshal.dump(idx)
when :json
index_file = JSON.dump(idx)
when :esm
index_file = "export default " + JSON.dump(idx)
end
if options[:gzip]
Gem::Util.gzip(index_file)
else
index_file
end
end
|
#initialize(unicode_version = nil, emoji_version = nil, format = nil) ⇒ Object
36
37
38
39
40
41
|
# File 'lib/unicoder/builder.rb', line 36
def initialize(unicode_version = nil, emoji_version = nil, format = nil)
@unicode_version = unicode_version || CURRENT_UNICODE_VERSION
@emoji_version = emoji_version || CURRENT_EMOJI_VERSION
@option = formats[format.to_sym] ? formats[format.to_sym][:option] || "" : ""
initialize_index
end
|
#initialize_index ⇒ Object
43
44
45
|
# File 'lib/unicoder/builder.rb', line 43
def initialize_index
@index = {}
end
|
27
28
29
30
31
32
33
34
|
# File 'lib/unicoder/builder.rb', line 27
def meta
{
META: {
generator: "unicoder v#{Unicoder::VERSION}",
unicodeVersion: @unicode_version,
},
}
end
|
#parse! ⇒ Object
59
60
61
|
# File 'lib/unicoder/builder.rb', line 59
def parse!
raise ArgumentError, "abstract"
end
|
#parse_file(identifier, parse_mode, **parse_options) ⇒ Object
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
# File 'lib/unicoder/builder.rb', line 63
def parse_file(identifier, parse_mode, **parse_options)
filename = UNICODE_FILES[identifier.to_sym] || filename
raise ArgumentError, "No valid file identifier or filename given" if !filename
filename = filename.dup
filename.sub! 'UNICODE_VERSION', @unicode_version
filename.sub! 'EMOJI_VERSION', @emoji_version
filename.sub! 'EMOJI_RELATED_VERSION', EMOJI_RELATED_UNICODE_VERSIONS[@emoji_version]
filename.sub! '.zip', ''
filename.sub! /\A(https?|ftp):\//, ""
Downloader.fetch(identifier) unless File.exist?(LOCAL_DATA_DIRECTORY + filename)
file = File.read(LOCAL_DATA_DIRECTORY + filename)
if parse_mode == :line
active = !parse_options[:begin]
file.each_line{ |line|
if !active && parse_options[:begin] && line.match?(parse_options[:begin])
active = true
elsif active && parse_options[:end] && line.match?(parse_options[:end])
active = false
end
if active
yield Hash[ $~.names.zip( $~.captures ) ] if line =~ parse_options[:regex]
end
}
elsif parse_mode == :xml
require "oga"
yield Oga.parse_xml(file)
else
yield file
end
end
|