Class: UTF8Utils::Codepoints

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/utf8_utils.rb

Constant Summary collapse

CP1251 =
{
  128 => [226, 130, 172],
  129 => nil,
  130 => [226, 128, 154],
  131 => [198, 146],
  132 => [226, 128, 158],
  133 => [226, 128, 166],
  134 => [226, 128, 160],
  135 => [226, 128, 161],
  136 => [203, 134],
  137 => [226, 128, 176],
  138 => [197, 160],
  139 => [226, 128, 185],
  140 => [197, 146],
  141 => nil,
  142 => [197, 189],
  143 => nil,
  144 => nil,
  145 => [226, 128, 152],
  146 => [226, 128, 153],
  147 => [226, 128, 156],
  148 => [226, 128, 157],
  149 => [226, 128, 162],
  150 => [226, 128, 147],
  151 => [226, 128, 148],
  152 => [203, 156],
  153 => [226, 132, 162],
  154 => [197, 161],
  155 => [226, 128, 186],
  156 => [197, 147],
  157 => nil,
  158 => [197, 190],
  159 => [197, 184]
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(string) ⇒ Codepoints

Returns a new instance of Codepoints.



48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/utf8_utils.rb', line 48

def initialize(string)
  @position = 0
  # 1.8.6's `each_byte` does not return an Enumerable
  if RUBY_VERSION < "1.8.7"
    @chars = []
    string.each_byte { |b| @chars << b }
  else
    # Create an array of bytes without raising an ArgumentError in 1.9.x
    # when the string contains invalid UTF-8 characters
    @chars = string.each_byte.entries
  end
end

Instance Attribute Details

#charsObject

Returns the value of attribute chars.



8
9
10
# File 'lib/utf8_utils.rb', line 8

def chars
  @chars
end

#positionObject (readonly)

Returns the value of attribute position.



9
10
11
# File 'lib/utf8_utils.rb', line 9

def position
  @position
end

Instance Method Details

#tidy_bytesObject

Attempt to clean up malformed characters.



62
63
64
# File 'lib/utf8_utils.rb', line 62

def tidy_bytes
  Codepoints.new(entries.map {|c| c.tidy.to_char}.compact.join)
end

#to_sObject

Cast to string.



67
68
69
# File 'lib/utf8_utils.rb', line 67

def to_s
  entries.map {|e| e.to_char}.join
end