Class: Tsukiko
- Inherits:
-
Object
- Object
- Tsukiko
- Defined in:
- lib/tsukiko.rb
Overview
encoding:utf-8
Instance Method Summary collapse
- #convert(str) ⇒ Object
-
#convert_han(str) ⇒ Object
call after convert_words.
- #convert_words(str) ⇒ Object
-
#initialize(dic_path = File.dirname(__FILE__)) ⇒ Tsukiko
constructor
A new instance of Tsukiko.
- #ref(str) ⇒ Object
-
#use_bigram(str, i) ⇒ Object
when there is a 1:n converation.
Constructor Details
#initialize(dic_path = File.dirname(__FILE__)) ⇒ Tsukiko
Returns a new instance of Tsukiko.
5 6 7 8 9 10 11 12 |
# File 'lib/tsukiko.rb', line 5 def initialize(dic_path=File.dirname(__FILE__)) @words=Hash.new @han=Hash.new @bigram=Hash.new @words=Marshal.load(File.open(dic_path+"/../data/words.data","rb").read) @han=Marshal.load(File.open(dic_path+"/../data/cn_tw.data","rb").read) @bigram=Marshal.load(File.open(dic_path+"/../data/bigram.data","rb").read) end |
Instance Method Details
#convert(str) ⇒ Object
70 71 72 73 74 75 |
# File 'lib/tsukiko.rb', line 70 def convert(str) @result=convert_han(convert_words(str)) # @result=convert_words(str) @result.gsub!(" ","") @result end |
#convert_han(str) ⇒ Object
call after convert_words
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/tsukiko.rb', line 20 def convert_han(str) tmpstr=str.clone while tmpstr.sub!(/\S+/,"") # matches=str.match(/(\S+)/) tmp=$&.chomp # puts tmp tmpp=tmp.clone for i in 0..tmp.length-1 #convert each character if @han[tmp[i]]==1 tmpp[i]=@han[tmp[i]][0] elsif @han[tmp[i]]==nil tmpp[i]=tmp[i] else tmpp[i]=use_bigram(tmpp,i) end end @result=str.gsub!(tmp,tmpp) end @result end |
#convert_words(str) ⇒ Object
13 14 15 16 17 18 |
# File 'lib/tsukiko.rb', line 13 def convert_words(str) @words.each{|cn,tw| str.gsub!(cn," "+tw+" ") } @result=str end |
#ref(str) ⇒ Object
63 64 65 66 67 68 |
# File 'lib/tsukiko.rb', line 63 def ref(str) # return @bigram[str] # puts str # puts @bigram[str] return @bigram[str] end |
#use_bigram(str, i) ⇒ Object
when there is a 1:n converation
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/tsukiko.rb', line 42 def use_bigram(str,i) max=@han[str[i]][0] if i==0 @han[str[i]].each{|h| if ref("$"+h)>ref("$"+max) max=h end } else @han[str[i]].each{|h| if ref(str[i-1]+h)>ref(str[i-1]+max) max=h end } end return max end |