Class: Invoca::Utils::GuaranteedUTF8String

Inherits:
Object
  • Object
show all
Defined in:
lib/invoca/utils/guaranteed_utf8_string.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(string) ⇒ GuaranteedUTF8String



12
13
14
# File 'lib/invoca/utils/guaranteed_utf8_string.rb', line 12

def initialize(string)
  @to_string = self.class.normalize_string(string)
end

Instance Attribute Details

#to_stringObject (readonly) Also known as: to_s

Returns the value of attribute to_string.



10
11
12
# File 'lib/invoca/utils/guaranteed_utf8_string.rb', line 10

def to_string
  @to_string
end

Class Method Details

.normalize_string(orig_string, normalize_utf16: true, normalize_cp1252: true, normalize_newlines: true, remove_utf8_bom: true, replace_unicode_beyond_ffff: true) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/invoca/utils/guaranteed_utf8_string.rb', line 24

def normalize_string(orig_string,
                     normalize_utf16:              true,
                     normalize_cp1252:             true,
                     normalize_newlines:           true,
                     remove_utf8_bom:              true,
                     replace_unicode_beyond_ffff:  true)
  string =  if orig_string.is_a?(String) ||
              (orig_string.respond_to?(:to_s) &&
                orig_string.method(:to_s).owner != Kernel) # the lame .to_s from Kernel just calls .inspect :(
              orig_string.to_s.dup
            else
              raise ArgumentError, "must be passed a string or an object with a non-Kernel .to_s method but instead was #{orig_string.class} #{orig_string.inspect}"
            end
  string.force_encoding('UTF-8')
  normalize_string_from_utf8(string,
                             normalize_utf16: normalize_utf16,
                             normalize_cp1252: normalize_cp1252,
                             normalize_newlines: normalize_newlines,
                             remove_utf8_bom: remove_utf8_bom,
                             replace_unicode_beyond_ffff: replace_unicode_beyond_ffff)
end