Class: CharDet::UTF8Prober

Inherits:
CharSetProber show all
Defined in:
lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb

Instance Attribute Summary

Attributes inherited from CharSetProber

#active

Instance Method Summary collapse

Methods inherited from CharSetProber

#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state

Constructor Details

#initializeUTF8Prober

Returns a new instance of UTF8Prober.


33
34
35
36
37
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb', line 33

def initialize
  super()
  @_mCodingSM = CodingStateMachine.new(UTF8SMModel)
  reset()
end

Instance Method Details

#feed(aBuf) ⇒ Object


49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb', line 49

def feed(aBuf)
  aBuf.each_byte do |b|
    c = b.chr
    codingState = @_mCodingSM.next_state(c)
    if codingState == EError
      @_mState = ENotMe
      break
    elsif codingState == EItsMe
      @_mState = EFoundIt
      break
    elsif codingState == EStart
      if @_mCodingSM.get_current_charlen() >= 2
        @_mNumOfMBChar += 1
      end
    end
  end

  if get_state() == EDetecting
    if get_confidence() > SHORTCUT_THRESHOLD
      @_mState = EFoundIt
    end
  end

  return get_state()
end

#get_charset_nameObject


45
46
47
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb', line 45

def get_charset_name
  return "utf-8"
end

#get_confidenceObject


75
76
77
78
79
80
81
82
83
84
85
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb', line 75

def get_confidence
  unlike = 0.99
  if @_mNumOfMBChar < 6
    for i in (0...@_mNumOfMBChar)
      unlike = unlike * ONE_CHAR_PROB
    end
    return 1.0 - unlike
  else
    return unlike
  end
end

#resetObject


39
40
41
42
43
# File 'lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb', line 39

def reset
  super()
  @_mCodingSM.reset()
  @_mNumOfMBChar = 0
end