175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
# File 'lib/okura/serializer.rb', line 175
def compile(features_l,features_r,inputs,encoding,output)
puts 'loading...'
dic=Okura::WordDic::DoubleArray::Builder.new
Okura::Serializer::WordDic.each_input(inputs,encoding){|input|
parser=Okura::Parser::Word.new(input)
parser.each{|surface,lid,rid,cost|
word=Okura::Word.new(
surface,
features_l.from_id(lid),
features_r.from_id(rid),
cost
)
dic.define word
}
}
writer=Okura::Serializer::BinaryWriter.new output
words,base,check=dic.data_for_serialize
raise 'base.length!=check.length' if base.length!=check.length
puts 'writing words...'
words.instance_eval do
writer.write_object @groups
writer.write_object @left_features
writer.write_object @right_features
writer.write_int32_array @left_ids
writer.write_int32_array @right_ids
writer.write_int32_array @costs
writer.write_int32_array @surface_ids
@surfaces.instance_eval do
writer.write_object @str
writer.write_int32_array @indices
end
end
puts 'writing word index...'
writer.write_int32_array base
writer.write_int32_array check
end
|