Class: CharDet::MultiByteCharSetProber

Inherits:
CharSetProber show all
Defined in:
actionmailer/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb

Direct Known Subclasses

Big5Prober, EUCJPProber, EUCKRProber, EUCTWProber, GB2312Prober, SJISProber

Instance Attribute Summary

Attributes inherited from CharSetProber

#active

Instance Method Summary (collapse)

Methods inherited from CharSetProber

#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters, #get_state

Constructor Details

- (MultiByteCharSetProber) initialize



33
34
35
36
37
38
# File 'actionmailer/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb', line 33

def initialize
  super
  @_mDistributionAnalyzer = nil
  @_mCodingSM = nil
  @_mLastChar = "\x00\x00"
end

Instance Method Details

- (Object) feed(aBuf)



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'actionmailer/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb', line 54

def feed(aBuf)
  aLen = aBuf.length
  for i in (0...aLen)
    codingState = @_mCodingSM.next_state(aBuf[i..i])
    if codingState == EError
      $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
      @_mState = ENotMe
      break
    elsif codingState == EItsMe
      @_mState = EFoundIt
      break
    elsif codingState == EStart
      charLen = @_mCodingSM.get_current_charlen()
      if i == 0
        @_mLastChar[1] = aBuf[0..0]
        @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
      else
        @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
      end
    end
  end
  @_mLastChar[0] = aBuf[aLen-1..aLen-1]

  if get_state() == EDetecting
    if @_mDistributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
      @_mState = EFoundIt
    end
  end
  return get_state()
end

- (Object) get_charset_name



51
52
# File 'actionmailer/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb', line 51

def get_charset_name
end

- (Object) get_confidence



85
86
87
# File 'actionmailer/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb', line 85

def get_confidence
  return @_mDistributionAnalyzer.get_confidence()
end

- (Object) reset



40
41
42
43
44
45
46
47
48
49
# File 'actionmailer/lib/action_mailer/vendor/tmail-1.2.7/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb', line 40

def reset
  super
  if @_mCodingSM
    @_mCodingSM.reset()
  end
  if @_mDistributionAnalyzer
    @_mDistributionAnalyzer.reset()
  end
  @_mLastChar = "\x00\x00"
end