Class: RedshiftConnector::Reader::RedshiftCSV

Inherits:
Abstract
  • Object
show all
Defined in:
lib/redshift-connector/data_file/reader/redshift_csv.rb

Overview

Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE). UNLOAD escapes data by ‘' (backslash character), we cannot use standard CSV class.

Constant Summary collapse

UNESCAPE_MAP =
{
  '\\"' => '"',
  "\\'" => "'",
  '\\,' => ',',
  '\\r' => "\r",
  '\\n' => "\n",
  '\\\\' => '\\'
}

Constants inherited from Abstract

Abstract::READER_CLASSES

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Abstract

declare_reader, get_reader_class

Constructor Details

#initialize(f) ⇒ RedshiftCSV

f

IO



15
16
17
# File 'lib/redshift-connector/data_file/reader/redshift_csv.rb', line 15

def initialize(f)
  @f = f
end

Class Method Details

.data_object?(key) ⇒ Boolean

Returns:

  • (Boolean)


10
11
12
# File 'lib/redshift-connector/data_file/reader/redshift_csv.rb', line 10

def self.data_object?(key)
  /\.csv(?:\.|\z)/ =~ File.basename(key)
end

Instance Method Details

#eachObject



19
20
21
22
23
24
25
# File 'lib/redshift-connector/data_file/reader/redshift_csv.rb', line 19

def each
  # We can use simple #each_line to read single row
  # because line terminators are always escaped by UNLOAD.
  @f.each_line do |line|
    yield parse_row(line, @f.lineno)
  end
end

#parse_row(line, lineno = nil) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/redshift-connector/data_file/reader/redshift_csv.rb', line 27

def parse_row(line, lineno = nil)
  row = []
  s = StringScanner.new(line)
  s.skip(/\s+/)
  until s.eos?
    col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise MalformedCSVException, "CSV parse error at line #{lineno}"
    row.push unescape_column(col)
    s.skip(/\s*/)    # skip line terminator on line ends
    s.skip(/,\s*/)
  end
  row
end

#unescape_column(col) ⇒ Object



49
50
51
52
# File 'lib/redshift-connector/data_file/reader/redshift_csv.rb', line 49

def unescape_column(col)
  charmap = UNESCAPE_MAP
  col[1...-1].gsub(/\\./) {|s| charmap[s] }
end