Class: RedshiftConnector::Reader::RedshiftCSV

Inherits:
Abstract
  • Object
show all
Defined in:
lib/redshift_connector/reader/redshift_csv.rb

Overview

Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE). UNLOAD escapes data by ‘' (backslash character), we cannot use standard CSV class.

Constant Summary collapse

UNESCAPE_MAP =
{
  '\\"' => '"',
  "\\'" => "'",
  '\\,' => ',',
  '\\r' => "\r",
  '\\n' => "\n",
  '\\\\' => '\\'
}

Constants inherited from Abstract

Abstract::READER_CLASSES

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Abstract

declare_reader, get_reader_class

Constructor Details

#initialize(f) ⇒ RedshiftCSV

f

IO



16
17
18
# File 'lib/redshift_connector/reader/redshift_csv.rb', line 16

def initialize(f)
  @f = f
end

Class Method Details

.data_object?(key) ⇒ Boolean



11
12
13
# File 'lib/redshift_connector/reader/redshift_csv.rb', line 11

def self.data_object?(key)
  /\.csv(?:\.|\z)/ =~ File.basename(key)
end

Instance Method Details

#eachObject



20
21
22
23
24
25
26
# File 'lib/redshift_connector/reader/redshift_csv.rb', line 20

def each
  # We can use simple #each_line to read single row
  # because line terminators are always escaped by UNLOAD.
  @f.each_line do |line|
    yield parse_row(line, @f.lineno)
  end
end

#parse_row(line, lineno = nil) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/redshift_connector/reader/redshift_csv.rb', line 28

def parse_row(line, lineno = nil)
  row = []
  s = StringScanner.new(line)
  s.skip(/\s+/)
  until s.eos?
    col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise Reader::MalformedCSVException, "CSV parse error at line #{lineno}"
    row.push unescape_column(col)
    s.skip(/\s*/)    # skip line terminator on line ends
    s.skip(/,\s*/)
  end
  row
end

#unescape_column(col) ⇒ Object



50
51
52
53
# File 'lib/redshift_connector/reader/redshift_csv.rb', line 50

def unescape_column(col)
  charmap = UNESCAPE_MAP
  col[1...-1].gsub(/\\./) {|s| charmap[s] }
end