Class: Langusta::Detector
- Inherits:
-
Object
- Object
- Langusta::Detector
- Defined in:
- lib/langusta/detector.rb
Constant Summary collapse
- ALPHA_DEFAULT =
0.5
- ALPHA_WIDTH =
0.05
- ITERATION_LIMIT =
1000
- PROB_THRESHOLD =
0.1
- CONV_THRESHOLD =
0.99999
- BASE_FREQ =
10000
- UNKNOWN_LANG =
"unknown"
Instance Attribute Summary collapse
-
#alpha ⇒ Object
Returns the value of attribute alpha.
-
#max_text_length ⇒ Object
Returns the value of attribute max_text_length.
-
#verbose ⇒ Object
Returns the value of attribute verbose.
Instance Method Summary collapse
-
#append(text) ⇒ Object
Append more text to be recognized.
-
#detect ⇒ String
Detect the language.
-
#initialize(factory) ⇒ Detector
constructor
A new instance of Detector.
Constructor Details
#initialize(factory) ⇒ Detector
Returns a new instance of Detector.
13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/langusta/detector.rb', line 13 def initialize(factory) @word_lang_prob_map = factory.word_lang_prob_map @lang_list = factory.lang_list @text = [] @langprob = nil @alpha = ALPHA_DEFAULT @n_trial = 7 @max_text_length = 10000 @prior_map = nil @verbose = false end |
Instance Attribute Details
#alpha ⇒ Object
Returns the value of attribute alpha.
3 4 5 |
# File 'lib/langusta/detector.rb', line 3 def alpha @alpha end |
#max_text_length ⇒ Object
Returns the value of attribute max_text_length.
3 4 5 |
# File 'lib/langusta/detector.rb', line 3 def max_text_length @max_text_length end |
#verbose ⇒ Object
Returns the value of attribute verbose.
3 4 5 |
# File 'lib/langusta/detector.rb', line 3 def verbose @verbose end |
Instance Method Details
#append(text) ⇒ Object
Append more text to be recognized.
27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/langusta/detector.rb', line 27 def append(text) Guard.klass(text, Array, __method__) text = Codepoints.gsub!(text, RegexHelper::URL_REGEX, "\x00\x20") text = Codepoints.gsub!(text, RegexHelper::MAIL_REGEX, "\x00\x20") text = text.map do |c| NGram.normalize(c) end @text = Codepoints.gsub!(text, RegexHelper::SPACE_REGEX, "\x00\x20") end |
#detect ⇒ String
Detect the language.
41 42 43 44 |
# File 'lib/langusta/detector.rb', line 41 def detect probabilities = get_probabilities() (probabilities.length > 0) ? probabilities.first.lang : UNKNOWN_LANG end |