Class: GIGO::CharDet::Latin1Prober

Inherits:
CharSetProber show all
Defined in:
lib/gigo/rchardet/latin1prober.rb

Instance Attribute Summary

Attributes inherited from CharSetProber

#active

Instance Method Summary collapse

Methods inherited from CharSetProber

#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state

Constructor Details

#initializeLatin1Prober

Returns a new instance of Latin1Prober.



95
96
97
98
# File 'lib/gigo/rchardet/latin1prober.rb', line 95

def initialize
  super
  reset()
end

Instance Method Details

#feed(aBuf) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/gigo/rchardet/latin1prober.rb', line 110

def feed(aBuf)
  aBuf = filter_with_english_letters(aBuf)
  for c in aBuf.split('')
    char = c.respond_to?(:bytes) ? c.bytes.first : c[0]
  	charClass = Latin1_CharToClass[char]
  	freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
  	if freq == 0
  	  @_mState = ENotMe
  	  break
  	end
  	@_mFreqCounter[freq] += 1
  	@_mLastCharClass = charClass
  end

  return get_state()
end

#get_charset_nameObject



106
107
108
# File 'lib/gigo/rchardet/latin1prober.rb', line 106

def get_charset_name
  return "windows-1252"
end

#get_confidenceObject



127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/gigo/rchardet/latin1prober.rb', line 127

def get_confidence
  if get_state() == ENotMe
	return 0.01
  end

  total = @_mFreqCounter.inject{|a,b| a+b} 
  if total < 0.01
	confidence = 0.0
  else
	confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
  end
  if confidence < 0.0
	confidence = 0.0
  end
  # lower the confidence of latin1 so that other more accurate detector 
  # can take priority.
  confidence = confidence * 0.5
  return confidence
end

#resetObject



100
101
102
103
104
# File 'lib/gigo/rchardet/latin1prober.rb', line 100

def reset
  @_mLastCharClass = OTH
  @_mFreqCounter = [0] * FREQ_CAT_NUM
  super
end