Module: Unicoder::Builder
- Included in:
- Blocks, Categories, Confusable, DisplayWidth, Emoji, Name, NumericValue, Scripts, SequenceName, Types
- Defined in:
- lib/unicoder/builder.rb,
lib/unicoder/builders/name.rb,
lib/unicoder/builders/emoji.rb,
lib/unicoder/builders/types.rb,
lib/unicoder/builders/blocks.rb,
lib/unicoder/builders/scripts.rb,
lib/unicoder/builders/categories.rb,
lib/unicoder/builders/confusable.rb,
lib/unicoder/builders/display_width.rb,
lib/unicoder/builders/numeric_value.rb,
lib/unicoder/builders/sequence_name.rb
Overview
A builder defines a parse function which translates one (ore more) unicode data files into an index hash
Defined Under Namespace
Classes: Blocks, Categories, Confusable, DisplayWidth, Emoji, Name, NumericValue, Scripts, SequenceName, Types
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
Instance Attribute Details
Returns the value of attribute formats.
8
9
10
|
# File 'lib/unicoder/builder.rb', line 8
def formats
@formats
end
|
#index ⇒ Object
Returns the value of attribute index.
8
9
10
|
# File 'lib/unicoder/builder.rb', line 8
def index
@index
end
|
#option ⇒ Object
Returns the value of attribute option.
8
9
10
|
# File 'lib/unicoder/builder.rb', line 8
def option
@option
end
|
Class Method Details
.build(identifier, **options) ⇒ Object
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
|
# File 'lib/unicoder/builder.rb', line 113
def self.build(identifier, **options)
format = options[:format] || :marshal
require_relative "builders/#{identifier}"
builder_class = self.const_get(identifier.to_s.gsub(/(?:^|_)([a-z])/){ $1.upcase })
builder = builder_class.new(
options[:unicode_version],
options[:emoji_version],
format
)
puts "Building index for #{identifier}…"
if options[:option]
builder.option = options[:option]
end
builder.parse!
index_file = builder.export(**options)
destination ||= options[:destination] || identifier.to_s
destination += "#{builder.formats.dig(format.to_sym, :ext)}"
destination += ".gz" if options[:gzip]
bytes = File.write destination, index_file
puts "Index created at: #{destination} (#{bytes} bytes written)"
end
|
Instance Method Details
#assign(sub_index_name, codepoint, value) ⇒ Object
55
56
57
|
# File 'lib/unicoder/builder.rb', line 55
def assign(sub_index_name, codepoint, value)
assign_codepoint(codepoint, value, index[sub_index_name])
end
|
#assign_codepoint(codepoint, value, idx = @index) ⇒ Object
47
48
49
50
51
52
53
|
# File 'lib/unicoder/builder.rb', line 47
def assign_codepoint(codepoint, value, idx = @index)
if option =~ /charkeys/
idx[[codepoint].pack("U*")] = value
else
idx[codepoint] = value
end
end
|
#export(format: :marshal, **options) ⇒ Object
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
# File 'lib/unicoder/builder.rb', line 87
def export(format: :marshal, **options)
p index if options[:verbose]
if options[:meta]
idx = meta.merge(index)
else
idx = index
end
case format.to_sym
when :marshal
index_file = Marshal.dump(idx)
when :json
index_file = JSON.dump(idx)
when :esm
index_file = "export default " + JSON.dump(idx)
end
if options[:gzip]
Gem::Util.gzip(index_file)
else
index_file
end
end
|
#initialize(unicode_version = nil, emoji_version = nil, format = nil) ⇒ Object
36
37
38
39
40
41
|
# File 'lib/unicoder/builder.rb', line 36
def initialize(unicode_version = nil, emoji_version = nil, format = nil)
@unicode_version = unicode_version || CURRENT_UNICODE_VERSION
@emoji_version = emoji_version || CURRENT_EMOJI_VERSION
@option = formats[format.to_sym] ? formats[format.to_sym][:option] || "" : ""
initialize_index
end
|
#initialize_index ⇒ Object
43
44
45
|
# File 'lib/unicoder/builder.rb', line 43
def initialize_index
@index = {}
end
|
27
28
29
30
31
32
33
34
|
# File 'lib/unicoder/builder.rb', line 27
def meta
{
META: {
generator: "unicoder v#{Unicoder::VERSION}",
unicodeVersion: @unicode_version,
},
}
end
|
#parse! ⇒ Object
59
60
61
|
# File 'lib/unicoder/builder.rb', line 59
def parse!
raise ArgumentError, "abstract"
end
|
#parse_file(identifier, parse_mode, **parse_options) ⇒ Object
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
# File 'lib/unicoder/builder.rb', line 63
def parse_file(identifier, parse_mode, **parse_options)
filename = UNICODE_FILES[identifier.to_sym] || filename
raise ArgumentError, "No valid file identifier or filename given" if !filename
filename = filename.dup
filename.sub! 'UNICODE_VERSION', @unicode_version
filename.sub! 'EMOJI_VERSION', @emoji_version
filename.sub! 'EMOJI_RELATED_VERSION', EMOJI_RELATED_UNICODE_VERSIONS[@emoji_version]
filename.sub! '.zip', ''
filename.sub! /\A(https?|ftp):\//, ""
Downloader.fetch(identifier) unless File.exist?(LOCAL_DATA_DIRECTORY + filename)
file = File.read(LOCAL_DATA_DIRECTORY + filename)
if parse_mode == :line
file.each_line{ |line|
yield Hash[ $~.names.zip( $~.captures ) ] if line =~ parse_options[:regex]
}
elsif parse_mode == :xml
require "oga"
yield Oga.parse_xml(file)
else
yield file
end
end
|