Module: UTF8Utils

Defined in:
lib/utf8_utils.rb,
lib/utf8_utils/version.rb

Overview

Utilities for cleaning up UTF-8 strings with invalid characters.

Defined Under Namespace

Modules: StringExt, Version

Constant Summary collapse

CP1252 =

CP1252 decimal byte => UTF-8 approximation as an array of bytes

{
  128 => [226, 130, 172],
  129 => nil,
  130 => [226, 128, 154],
  131 => [198, 146],
  132 => [226, 128, 158],
  133 => [226, 128, 166],
  134 => [226, 128, 160],
  135 => [226, 128, 161],
  136 => [203, 134],
  137 => [226, 128, 176],
  138 => [197, 160],
  139 => [226, 128, 185],
  140 => [197, 146],
  141 => nil,
  142 => [197, 189],
  143 => nil,
  144 => nil,
  145 => [226, 128, 152],
  146 => [226, 128, 153],
  147 => [226, 128, 156],
  148 => [226, 128, 157],
  149 => [226, 128, 162],
  150 => [226, 128, 147],
  151 => [226, 128, 148],
  152 => [203, 156],
  153 => [226, 132, 162],
  154 => [197, 161],
  155 => [226, 128, 186],
  156 => [197, 147],
  157 => nil,
  158 => [197, 190],
  159 => [197, 184]
}