68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
# File 'lib/SingleByteCharSetProber.rb', line 68
def feed(aBuf)
unless @_mModel['keepEnglishLetter']
aBuf = filter_without_english_letters(aBuf)
end
aLen = aBuf.length
unless aLen
return get_state()
end
for i in 0...aLen
c = aBuf[i]
order = @_mModel['charToOrderMap'][c]
if order < SYMBOL_CAT_ORDER
@_mTotalChar += 1
end
if order < SAMPLE_SIZE
@_mFreqChar += 1
if @_mLastOrder < SAMPLE_SIZE
@_mTotalSeqs += 1
unless @_mReversed
@_mSeqCounters[@_mModel['precedenceMatrix'][(@_mLastOrder * SAMPLE_SIZE) + order]] += 1
else
@_mSeqCounters[@_mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + @_mLastOrder]] += 1
end
end
end
@_mLastOrder = order
end
if get_state() == :Detecting
if @_mTotalSeqs > SB_ENOUGH_REL_THRESHOLD
cf = get_confidence()
if cf > POSITIVE_SHORTCUT_THRESHOLD
if DEBUG
p('%s confidence = %s, we have a winner\n' % [@_mModel['charsetName'], cf])
end
@_mState = :FoundIt
elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
if DEBUG
p('%s confidence = %s, below negative shortcut threshhold %s\n' % [@_mModel['charsetName'], cf, NEGATIVE_SHORTCUT_THRESHOLD])
end
@_mState = :NotMe
end
end
end
return get_state()
end
|