Class: Okura::Serializer::WordDic::DoubleArray

Inherits:
Object
  • Object
show all
Defined in:
lib/okura/serializer.rb

Instance Method Summary collapse

Instance Method Details

#compile(features_l, features_r, inputs, encoding, output) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# File 'lib/okura/serializer.rb', line 175

def compile(features_l,features_r,inputs,encoding,output)
  puts 'loading...'
  dic=Okura::WordDic::DoubleArray::Builder.new
  Okura::Serializer::WordDic.each_input(inputs,encoding){|input|
    parser=Okura::Parser::Word.new(input)
    parser.each{|surface,lid,rid,cost|
      word=Okura::Word.new(
        surface,
        features_l.from_id(lid),
        features_r.from_id(rid),
        cost
      )
      dic.define word
    }
  }

  writer=Okura::Serializer::BinaryWriter.new output
  words,base,check=dic.data_for_serialize
  raise 'base.length!=check.length' if base.length!=check.length
  puts 'writing words...'
  words.instance_eval do
    writer.write_object @groups
    writer.write_object @left_features
    writer.write_object @right_features
    writer.write_int32_array @left_ids
    writer.write_int32_array @right_ids
    writer.write_int32_array @costs
    writer.write_int32_array @surface_ids
    @surfaces.instance_eval do
      writer.write_object @str
      writer.write_int32_array @indices
    end
  end
  puts 'writing word index...'
  writer.write_int32_array base
  writer.write_int32_array check
end

#load(io) ⇒ Object



212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/okura/serializer.rb', line 212

def load(io)
  reader=Okura::Serializer::BinaryReader.new io
  words=begin
          groups=reader.read_object
          left_features=reader.read_object
          right_features=reader.read_object
          left_ids=reader.read_int32_array
          right_ids=reader.read_int32_array
          costs=reader.read_int32_array
          surface_ids=reader.read_int32_array
          surfaces=begin
                     str=reader.read_object
                     indices=reader.read_int32_array
                     Okura::Words::CompactStringArray.new str,indices
                   end
          Okura::Words.new(
            groups,surfaces,left_features,right_features,surface_ids,left_ids,right_ids,costs
          )
        end
  base=reader.read_int32_array
  check=reader.read_int32_array
  Okura::WordDic::DoubleArray::Builder.build_from_serialized [words,base,check]
end