24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
# File 'lib/language_detector.rb', line 24
def self.train
training_data = [
[ "ar", "ar-utf8.txt", "utf8", "arabic" ],
[ "bg", "bg-utf8.txt", "utf8", "bulgarian" ],
[ "cs", "cs-utf8.txt", "utf8", "czech" ],
[ "da", "da-iso-8859-1.txt", "iso-8859-1", "danish" ],
[ "de", "de-utf8.txt", "utf8", "german" ],
[ "el", "el-utf8.txt", "utf8", "greek" ],
[ "en", "en-iso-8859-1.txt", "iso-8859-1", "english" ],
[ "et", "et-utf8.txt", "utf8", "estonian" ],
[ "es", "es-utf8.txt", "utf8", "spanish" ],
[ "fa", "fa-utf8.txt", "utf8", "farsi" ],
[ "fi", "fi-utf8.txt", "utf8", "finnish" ],
[ "fr", "fr-utf8.txt", "utf8", "french" ],
[ "fy", "fy-utf8.txt", "utf8", "frisian" ],
[ "ga", "ga-utf8.txt", "utf8", "irish" ],
[ "he", "he-utf8.txt", "utf8", "hebrew" ],
[ "hi", "hi-utf8.txt", "utf8", "hindi" ],
[ "hr", "hr-utf8.txt", "utf8", "croatian" ],
[ "io", "io-utf8.txt", "utf8", "ido" ],
[ "is", "is-utf8.txt", "utf8", "icelandic" ],
[ "it", "it-utf8.txt", "utf8", "italian" ],
[ "ja", "ja-utf8.txt", "utf8", "japanese" ],
[ "ko", "ko-utf8.txt", "utf8", "korean" ],
[ "hu", "hu-utf8.txt", "utf8", "hungarian" ],
[ "nl", "nl-iso-8859-1.txt", "iso-8859-1", "dutch" ],
[ "no", "no-utf8.txt", "utf8", "norwegian" ],
[ "pl", "pl-utf8.txt", "utf8", "polish" ],
[ "pt", "pt-utf8.txt", "utf8", "portuguese" ],
[ "ro", "ro-utf8.txt", "utf8", "romanian" ],
[ "ru", "ru-utf8.txt", "utf8", "russian" ],
[ "sl", "sl-utf8.txt", "utf8", "slovenian" ],
[ "sv", "sv-iso-8859-1.txt", "iso-8859-1", "swedish" ],
[ "th", "th-utf8.txt", "utf8", "thai" ],
[ "uk", "uk-utf8.txt", "utf8", "ukraninan" ],
[ "vi", "vi-utf8.txt", "utf8", "vietnamese" ],
[ "zh-CN", "zh-utf8.txt", "utf8", "chinese simplified" ],
[ "zh-TW", "zh-TW-utf8.txt", "utf8", "chinese traditional"]
]
profiles = []
training_data.each {|data|
p = LanguageDetector::Profile.new data[0]
p.init_with_file data[1]
profiles << p
}
puts 'saving model...'
filename = File.expand_path(File.join(File.dirname(__FILE__), "model.yml"))
File.open(filename, 'w') {|f|
YAML.dump(profiles, f)
}
end
|