Class: ArcFurnace::CSVSource

Inherits:
EnumeratorSource show all
Includes:
CSVToHashWithDuplicateHeaders
Defined in:
lib/arc-furnace/csv_source.rb

Constant Summary collapse

COMMA =
','.freeze

Instance Attribute Summary collapse

Attributes inherited from Node

#error_handler, #node_id, #params

Instance Method Summary collapse

Methods included from CSVToHashWithDuplicateHeaders

#csv_to_hash_with_duplicates

Methods inherited from EnumeratorSource

#advance, #empty?

Methods inherited from Source

#advance, #close, #empty?, #prepare, #row

Constructor Details

#initialize(filename: nil, csv: nil, encoding: 'UTF-8', delimiter: COMMA, group_by: false, key_column: nil) ⇒ CSVSource

Returns a new instance of CSVSource.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/arc-furnace/csv_source.rb', line 14

def initialize(
  filename: nil,
  csv: nil,
  encoding: 'UTF-8',
  delimiter: COMMA,
  group_by: false,
  key_column: nil
)
  @file = File.open(filename, encoding: encoding) if filename
  @csv = csv
  @delimiter = delimiter
  @preprocessed_csv = []
  @group_by = group_by
  @key_column = key_column
  super()
end

Instance Attribute Details

#csvObject (readonly)

Returns the value of attribute csv.



9
10
11
# File 'lib/arc-furnace/csv_source.rb', line 9

def csv
  @csv
end

#delimiterObject (readonly)

Returns the value of attribute delimiter.



9
10
11
# File 'lib/arc-furnace/csv_source.rb', line 9

def delimiter
  @delimiter
end

#fileObject (readonly)

Returns the value of attribute file.



9
10
11
# File 'lib/arc-furnace/csv_source.rb', line 9

def file
  @file
end

#group_byObject (readonly) Also known as: group_by?

Returns the value of attribute group_by.



9
10
11
# File 'lib/arc-furnace/csv_source.rb', line 9

def group_by
  @group_by
end

#key_columnObject (readonly)

Returns the value of attribute key_column.



9
10
11
# File 'lib/arc-furnace/csv_source.rb', line 9

def key_column
  @key_column
end

#preprocessed_csvObject (readonly)

Returns the value of attribute preprocessed_csv.



9
10
11
# File 'lib/arc-furnace/csv_source.rb', line 9

def preprocessed_csv
  @preprocessed_csv
end

#valueObject (readonly)

Returns the value of attribute value.



9
10
11
# File 'lib/arc-furnace/csv_source.rb', line 9

def value
  @value
end

Instance Method Details

#build_enumeratorObject



48
49
50
51
52
53
54
55
56
# File 'lib/arc-furnace/csv_source.rb', line 48

def build_enumerator
  Enumerator.new do |yielder|
    if group_by?
      preprocessed_csv.each { |_, array| yielder.yield(array) }
    else
      parse_file { |row| yielder.yield(csv_to_hash_with_duplicates(row)) }
    end
  end
end

#finalizeObject



44
45
46
# File 'lib/arc-furnace/csv_source.rb', line 44

def finalize
  file.close if file
end

#parse_fileObject



58
59
60
# File 'lib/arc-furnace/csv_source.rb', line 58

def parse_file
  (csv ? csv : CSV.new(file, { headers: true, col_sep: delimiter })).each { |row| yield row }
end

#preprocessObject

note that group_by requires the entire file to be read into memory



37
38
39
40
41
42
# File 'lib/arc-furnace/csv_source.rb', line 37

def preprocess
  if group_by?
    parse_file { |row| @preprocessed_csv << csv_to_hash_with_duplicates(row) }
    @preprocessed_csv = @preprocessed_csv.group_by { |row| row[key_column] }
  end
end