Module: Eco::Data::Files::Encoding

Includes:
Language::AuxiliarLogger
Included in:
ClassMethods, Content
Defined in:
lib/eco/data/files/encoding.rb

Constant Summary collapse

BOM_BYTES =
[239, 187, 191].freeze

Instance Attribute Summary

Attributes included from Language::AuxiliarLogger

#logger

Instance Method Summary collapse

Methods included from Language::AuxiliarLogger

#log

Instance Method Details

#bom?(path) ⇒ Boolean

Returns:

  • (Boolean)


9
10
11
12
13
14
15
16
17
18
# File 'lib/eco/data/files/encoding.rb', line 9

def bom?(path)
  return false unless path
  return false unless File.file?(path)
  return false if File.empty?(path)

  File.open(path, 'rb') do |f|
    bytes = f.read(3)
    return bytes.unpack('C*') == BOM_BYTES
  end
end

#encoding(path) ⇒ Object



55
56
57
# File 'lib/eco/data/files/encoding.rb', line 55

def encoding(path)
  bom?(path) ? 'bom' : 'utf-8'
end

#get_file_content_with_encoding(file, encoding: nil) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/eco/data/files/encoding.rb', line 20

def get_file_content_with_encoding(file, encoding: nil)
  encoding ||= scoped_encoding(file)

  unless !encoding || encoding == 'utf-8'
    msg = "File encoding: '#{encoding}'"
    log(:debug) { msg }
    puts msg
  end

  bom_enc = encoding && encoding.split('|')[0] == 'bom'
  if bom?(file) || bom_enc
    content  = remove_bom(File.read(file, encoding: 'utf-8'))
    encoding = 'utf-8'
  else
    content = File.read(file, encoding: encoding)
  end

  return unless content

  content = content.encode('utf-8') unless encoding.include?('utf-8')
  content
end

#remove_bom(content) ⇒ Object

Changes encoding from bom to utf8 https://stackoverflow.com/a/24916365/4352306



45
46
47
48
49
50
51
52
53
# File 'lib/eco/data/files/encoding.rb', line 45

def remove_bom(content)
  if content.bytes[0..2] == BOM_BYTES
    bom     = BOM_BYTES.pack('C*').force_encoding('utf-8').encode('utf-8')
    content = content.sub(bom, '')
    content.force_encoding('utf-8')
  end

  content
end

#scoped_encoding(path) ⇒ Object

Gives the parameter as it should



60
61
62
63
64
65
66
67
68
69
# File 'lib/eco/data/files/encoding.rb', line 60

def scoped_encoding(path)
  unless File.exist?(path)
    log(:error) { "File does not exist: #{path}" }
    return
  end

  encoding ||= encoding(path)
  encoding   = "#{encoding}|utf-8" if encoding == 'bom'
  encoding
end