Class: Okura::UnkDic

Inherits:
Object
  • Object
show all
Defined in:
lib/okura.rb

Instance Method Summary collapse

Constructor Details

#initialize(char_types) ⇒ UnkDic

CharTypes -> Features ->



289
290
291
292
293
# File 'lib/okura.rb', line 289

def initialize char_types
  @char_types=char_types
  # CharType.name => [Word]
  @templates={}
end

Instance Method Details

#define(type_name, left, right, cost) ⇒ Object

String -> Feature -> Feature -> Integer ->



320
321
322
323
# File 'lib/okura.rb', line 320

def define type_name,left,right,cost
  type=@char_types.named type_name
  (@templates[type_name]||=[]).push Word.new '',left,right,cost
end

#possible_words(str, i, found_in_normal_dic) ⇒ Object

-> [Word]



295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/okura.rb', line 295

def possible_words str,i,found_in_normal_dic
  ret=[]
  first_char_type=@char_types.type_for str[i].ord
  return [] if found_in_normal_dic && !first_char_type.invoke?

  collect_result ret,first_char_type,str[i..i] if first_char_type.length > 0

  l=1
  str[(i+1)..-1].each_codepoint{|cp|
    break unless first_char_type.accept? cp
    l+=1
    collect_result ret,first_char_type,str[i...(i+l)] if first_char_type.length >= l
  }
  collect_result ret,first_char_type,str[i...(i+l)] if first_char_type.group? && first_char_type.length < l

  ret
end

#rule_sizeObject



327
328
329
# File 'lib/okura.rb', line 327

def rule_size
  @templates.values.inject(0){|sum,t|sum+t.size}
end

#word_templates_for(type_name) ⇒ Object



324
325
326
# File 'lib/okura.rb', line 324

def word_templates_for type_name
  @templates[type_name].dup
end