Class: Tsukiko

Inherits:
Object
  • Object
show all
Defined in:
lib/tsukiko.rb

Overview

encoding:utf-8

Instance Method Summary collapse

Constructor Details

#initialize(dic_path = File.dirname(__FILE__)) ⇒ Tsukiko

Returns a new instance of Tsukiko.



5
6
7
8
9
10
11
12
# File 'lib/tsukiko.rb', line 5

def initialize(dic_path=File.dirname(__FILE__))
	@words=Hash.new
	@han=Hash.new
	@bigram=Hash.new
	@words=Marshal.load(File.open(dic_path+"/../data/words.data","rb").read)
	@han=Marshal.load(File.open(dic_path+"/../data/cn_tw.data","rb").read)
	@bigram=Marshal.load(File.open(dic_path+"/../data/bigram.data","rb").read)
end

Instance Method Details

#convert(str) ⇒ Object



70
71
72
73
74
75
# File 'lib/tsukiko.rb', line 70

def convert(str)
	@result=convert_han(convert_words(str))
	# @result=convert_words(str)
	@result.gsub!("   ","")
	@result
end

#convert_han(str) ⇒ Object

call after convert_words



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/tsukiko.rb', line 20

def convert_han(str)
	tmpstr=str.clone
	while tmpstr.sub!(/\S+/,"")
	# matches=str.match(/(\S+)/)
 			tmp=$&.chomp
 			# puts tmp
 			tmpp=tmp.clone
			for i in 0..tmp.length-1
				#convert each character
				if @han[tmp[i]]==1
					tmpp[i]=@han[tmp[i]][0]
				elsif @han[tmp[i]]==nil
					tmpp[i]=tmp[i]	
				else
					tmpp[i]=use_bigram(tmpp,i)
				end
			end
			 @result=str.gsub!(tmp,tmpp)
	end
	@result
end

#convert_words(str) ⇒ Object



13
14
15
16
17
18
# File 'lib/tsukiko.rb', line 13

def convert_words(str)
	@words.each{|cn,tw|
		str.gsub!(cn,"   "+tw+"   ")
	}
	@result=str
end

#ref(str) ⇒ Object



63
64
65
66
67
68
# File 'lib/tsukiko.rb', line 63

def ref(str)
	# return @bigram[str]
	# puts str
	# puts @bigram[str]
	return @bigram[str]
end

#use_bigram(str, i) ⇒ Object

when there is a 1:n converation



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/tsukiko.rb', line 42

def use_bigram(str,i)
	max=@han[str[i]][0]
	if i==0
		@han[str[i]].each{|h|
			if ref("$"+h)>ref("$"+max)
				max=h
			end

		}
	else
			@han[str[i]].each{|h|
			if ref(str[i-1]+h)>ref(str[i-1]+max)
				max=h
			end

		}
	end
	return max

end