Class: RedshiftConnector::AbstractDataFileBundle

Inherits:
Object
  • Object
show all
Defined in:
lib/redshift_connector/abstract_data_file_bundle.rb

Direct Known Subclasses

S3DataFileBundle, UrlDataFileBundle

Constant Summary collapse

REPORT_SIZE =

abstract data_files

10_0000

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filter: nil, batch_size: 1000, logger: RedshiftConnector.logger) ⇒ AbstractDataFileBundle

Returns a new instance of AbstractDataFileBundle.



3
4
5
6
7
# File 'lib/redshift_connector/abstract_data_file_bundle.rb', line 3

def initialize(filter: nil, batch_size: 1000, logger: RedshiftConnector.logger)
  @filter = filter || lambda {|*row| row }
  @batch_size = batch_size || 1000
  @logger = logger
end

Instance Attribute Details

#batch_sizeObject (readonly)

Returns the value of attribute batch_size.



9
10
11
# File 'lib/redshift_connector/abstract_data_file_bundle.rb', line 9

def batch_size
  @batch_size
end

#loggerObject (readonly)

Returns the value of attribute logger.



10
11
12
# File 'lib/redshift_connector/abstract_data_file_bundle.rb', line 10

def logger
  @logger
end

Instance Method Details

#all_data_objectsObject



27
28
29
# File 'lib/redshift_connector/abstract_data_file_bundle.rb', line 27

def all_data_objects
  data_files.select {|obj| obj.data_object? }
end

#each_batch(report: true) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/redshift_connector/abstract_data_file_bundle.rb', line 35

def each_batch(report: true)
  n = 0
  reported = 0
  do_each_batch(@batch_size) do |rows|
    yield rows
    n += rows.size
    if n / REPORT_SIZE > reported
      @logger.info "#{n} rows processed" if report
      reported = n / REPORT_SIZE
    end
  end
  @logger.info "total #{n} rows processed" if report
end

#each_object(&block) ⇒ Object



20
21
22
23
24
25
# File 'lib/redshift_connector/abstract_data_file_bundle.rb', line 20

def each_object(&block)
  all_data_objects.each do |obj|
    @logger.info "processing s3 object: #{obj.key}"
    yield obj
  end
end

#each_row(&block) ⇒ Object Also known as: each



12
13
14
15
16
# File 'lib/redshift_connector/abstract_data_file_bundle.rb', line 12

def each_row(&block)
  each_object do |obj|
    obj.each_row(&block)
  end
end