Class: CSVUtils::CSVOptions

Inherits:
Object
  • Object
show all
Defined in:
lib/csv_utils/csv_options.rb

Constant Summary collapse

BYTE_ORDER_MARKS =
{
  "\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
  "\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
  "\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
  "\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
  "\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
}
COL_SEPARATORS =
[
  "\x02",
  "\t",
  '|',
  ','
]
ROW_SEPARATORS =
[
  "\r\n",
  "\n",
  "\r"
]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(io) ⇒ CSVOptions

Returns a new instance of CSVOptions.



34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/csv_utils/csv_options.rb', line 34

def initialize(io)
  line =
    if io.is_a?(String)
      File.open(io, 'rb', &:readline)
    else
      io.readline
    end

  @col_separator = auto_detect_col_sep(line)
  @row_separator = auto_detect_row_sep(line)
  @byte_order_mark = get_byte_order_mark(line)
  @encoding = get_character_encoding(@byte_order_mark)
  @columns = get_number_of_columns(line) if @col_separator
end

Instance Attribute Details

#byte_order_markObject (readonly)

Returns the value of attribute byte_order_mark.



27
28
29
# File 'lib/csv_utils/csv_options.rb', line 27

def byte_order_mark
  @byte_order_mark
end

#col_separatorObject (readonly)

Returns the value of attribute col_separator.



27
28
29
# File 'lib/csv_utils/csv_options.rb', line 27

def col_separator
  @col_separator
end

#columnsObject (readonly)

Returns the value of attribute columns.



27
28
29
# File 'lib/csv_utils/csv_options.rb', line 27

def columns
  @columns
end

#encodingObject (readonly)

Returns the value of attribute encoding.



27
28
29
# File 'lib/csv_utils/csv_options.rb', line 27

def encoding
  @encoding
end

#row_separatorObject (readonly)

Returns the value of attribute row_separator.



27
28
29
# File 'lib/csv_utils/csv_options.rb', line 27

def row_separator
  @row_separator
end

Instance Method Details

#auto_detect_col_sep(line) ⇒ Object



55
56
57
# File 'lib/csv_utils/csv_options.rb', line 55

def auto_detect_col_sep(line)
  COL_SEPARATORS.detect { |sep| line.include?(sep) }
end

#auto_detect_row_sep(line) ⇒ Object



59
60
61
# File 'lib/csv_utils/csv_options.rb', line 59

def auto_detect_row_sep(line)
  ROW_SEPARATORS.detect { |sep| line.include?(sep) }
end

#get_byte_order_mark(line) ⇒ Object



73
74
75
76
77
# File 'lib/csv_utils/csv_options.rb', line 73

def get_byte_order_mark(line)
  BYTE_ORDER_MARKS.keys.detect do |bom|
    line =~ /\A#{bom}/
  end
end

#get_character_encoding(bom) ⇒ Object



79
80
81
# File 'lib/csv_utils/csv_options.rb', line 79

def get_character_encoding(bom)
  BYTE_ORDER_MARKS[bom] || 'UTF-8'
end

#get_headers(line) ⇒ Object



63
64
65
66
67
# File 'lib/csv_utils/csv_options.rb', line 63

def get_headers(line)
  headers = line.split(col_separator)
  headers[0] = strip_byte_order_marks(headers[0])
  headers
end

#get_number_of_columns(line) ⇒ Object



69
70
71
# File 'lib/csv_utils/csv_options.rb', line 69

def get_number_of_columns(line)
  get_headers(line).size
end

#strip_byte_order_marks(header) ⇒ Object



83
84
85
# File 'lib/csv_utils/csv_options.rb', line 83

def strip_byte_order_marks(header)
  @byte_order_marks ? header.sub(@byte_order_marks, '') : header
end

#valid?Boolean

Returns:

  • (Boolean)


49
50
51
52
53
# File 'lib/csv_utils/csv_options.rb', line 49

def valid?
  return false if @col_separator.nil? || @row_separator.nil?

  true
end