Class: CharDet::UTF1632Prober
Instance Attribute Summary
#active
Instance Method Summary
collapse
#filter_high_bit_only, #filter_with_english_letters, #filter_without_english_letters
Constructor Details
Returns a new instance of UTF1632Prober.
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
# File 'lib/rchardet/utf1632prober.rb', line 34
def initialize
super()
@position = 0
@zeros_at_mod = [0, 0, 0, 0]
@nonzeros_at_mod = [0, 0, 0, 0]
@state = EDetecting
@quad = [0, 0, 0, 0]
@invalid_utf16be = false
@invalid_utf16le = false
@invalid_utf32be = false
@invalid_utf32le = false
@first_half_surrogate_pair_detected_16be = false
@first_half_surrogate_pair_detected_16le = false
reset()
end
|
Instance Method Details
#feed(aBuf) ⇒ Object
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
# File 'lib/rchardet/utf1632prober.rb', line 82
def feed(aBuf)
aBuf.each_byte do |b|
mod4 = @position % 4
@quad[mod4] = b
if mod4 == 3
validate_utf32_characters(@quad)
validate_utf16_characters(@quad[0..2])
validate_utf16_characters(@quad[2..4])
end
if b == 0
@zeros_at_mod[mod4] += 1
else
@nonzeros_at_mod[mod4] += 1
end
@position += 1
end
return get_state()
end
|
#get_charset_name ⇒ Object
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
# File 'lib/rchardet/utf1632prober.rb', line 65
def get_charset_name
if is_likely_utf32be
return "UTF-32BE"
end
if is_likely_utf32le
return "UTF-32LE"
end
if is_likely_utf16be
return "UTF-16BE"
end
if is_likely_utf16le
return "UTF-16LE"
end
return "UTF-16"
end
|
#get_confidence ⇒ Object
117
118
119
120
121
122
123
|
# File 'lib/rchardet/utf1632prober.rb', line 117
def get_confidence
if is_likely_utf16le || is_likely_utf16be || is_likely_utf32le || is_likely_utf32be
0.85
else
0.00
end
end
|
#get_state ⇒ Object
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
# File 'lib/rchardet/utf1632prober.rb', line 102
def get_state
if [ENotMe, EFoundIt].include? @state
return @state
end
if get_confidence > 0.80
@state = EFoundIt
elsif @position > 4 * 1024
@state = ENotMe
end
return @state
end
|
#reset ⇒ Object
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
# File 'lib/rchardet/utf1632prober.rb', line 50
def reset
super()
@position = 0
@zeros_at_mod = [0, 0, 0, 0]
@nonzeros_at_mod = [0, 0, 0, 0]
@state = EDetecting
@invalid_utf16be = false
@invalid_utf16le = false
@invalid_utf32be = false
@invalid_utf32le = false
@first_half_surrogate_pair_detected_16be = false
@first_half_surrogate_pair_detected_16le = false
@quad = [0, 0, 0, 0]
end
|