Class: Okura::UnkDic
- Inherits:
-
Object
- Object
- Okura::UnkDic
- Defined in:
- lib/okura.rb
Instance Method Summary collapse
-
#define(type_name, left, right, cost) ⇒ Object
String -> Feature -> Feature -> Integer ->.
-
#initialize(char_types) ⇒ UnkDic
constructor
CharTypes -> Features ->.
-
#possible_words(str, i, found_in_normal_dic) ⇒ Object
-> [Word].
- #rule_size ⇒ Object
- #word_templates_for(type_name) ⇒ Object
Constructor Details
#initialize(char_types) ⇒ UnkDic
CharTypes -> Features ->
289 290 291 292 293 |
# File 'lib/okura.rb', line 289 def initialize char_types @char_types=char_types # CharType.name => [Word] @templates={} end |
Instance Method Details
#define(type_name, left, right, cost) ⇒ Object
String -> Feature -> Feature -> Integer ->
320 321 322 323 |
# File 'lib/okura.rb', line 320 def define type_name,left,right,cost type=@char_types.named type_name (@templates[type_name]||=[]).push Word.new '',left,right,cost end |
#possible_words(str, i, found_in_normal_dic) ⇒ Object
-> [Word]
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 |
# File 'lib/okura.rb', line 295 def possible_words str,i,found_in_normal_dic ret=[] first_char_type=@char_types.type_for str[i].ord return [] if found_in_normal_dic && !first_char_type.invoke? collect_result ret,first_char_type,str[i..i] if first_char_type.length > 0 l=1 str[(i+1)..-1].each_codepoint{|cp| break unless first_char_type.accept? cp l+=1 collect_result ret,first_char_type,str[i...(i+l)] if first_char_type.length >= l } collect_result ret,first_char_type,str[i...(i+l)] if first_char_type.group? && first_char_type.length < l ret end |
#rule_size ⇒ Object
327 328 329 |
# File 'lib/okura.rb', line 327 def rule_size @templates.values.inject(0){|sum,t|sum+t.size} end |
#word_templates_for(type_name) ⇒ Object
324 325 326 |
# File 'lib/okura.rb', line 324 def word_templates_for type_name @templates[type_name].dup end |