Class: Remi::DataSource::CsvFile

Inherits:
Remi::DataSubject show all
Includes:
DataStub, Remi::DataSubject::CsvFile, Remi::DataSubject::DataSource
Defined in:
lib/remi/cucumber/data_source.rb,
lib/remi/data_subject/csv_file.rb

Instance Attribute Summary collapse

Attributes inherited from Remi::DataSubject

#fields

Instance Method Summary collapse

Methods included from Remi::DataSubject::CsvFile

#field_symbolizer, included

Methods included from Remi::DataSubject::DataSource

#df, #extract

Methods included from DataStub

#empty_stub_df, #stub_df, #stub_row_array, #stub_values

Methods inherited from Remi::DataSubject

#df, #df=, #field_symbolizer

Constructor Details

#initialize(*args, **kargs, &block) ⇒ CsvFile

Returns a new instance of CsvFile.



33
34
35
36
# File 'lib/remi/data_subject/csv_file.rb', line 33

def initialize(*args, **kargs, &block)
  super
  init_csv_file(*args, **kargs, &block)
end

Instance Attribute Details

#csv_optionsObject (readonly)

Returns the value of attribute csv_options.



39
40
41
# File 'lib/remi/data_subject/csv_file.rb', line 39

def csv_options
  @csv_options
end

#extractorObject

Returns the value of attribute extractor.



38
39
40
# File 'lib/remi/data_subject/csv_file.rb', line 38

def extractor
  @extractor
end

Instance Method Details

#extract!Object

Public: Called to extract data from the source.

Returns data in a format that can be used to create a dataframe.



44
45
46
# File 'lib/remi/data_subject/csv_file.rb', line 44

def extract!
  @extract = Array(@extractor.extract)
end

#first_lineObject



92
93
94
95
96
97
# File 'lib/remi/data_subject/csv_file.rb', line 92

def first_line
  # Readline assumes \n line endings.  Strip out \r if it is a DOS file.
  @first_line ||= File.open(source_filename) do |f|
    f.readline.gsub(/\r/,'')
  end
end

#headersObject



99
100
101
# File 'lib/remi/data_subject/csv_file.rb', line 99

def headers
  @headers ||= CSV.open(source_filename, 'r', source_csv_options) { |csv| csv.first }.headers
end

#source_filenameObject

Only going to support single file for now



87
88
89
90
# File 'lib/remi/data_subject/csv_file.rb', line 87

def source_filename
  raise "Multiple source files detected" if extract.size > 1
  @source_filename ||= extract.first
end

#stub_headerObject



48
49
50
# File 'lib/remi/cucumber/data_source.rb', line 48

def stub_header
  @fields.keys.join(@csv_options[:col_sep])
end

#stub_row_csvObject



52
53
54
# File 'lib/remi/cucumber/data_source.rb', line 52

def stub_row_csv
  stub_row_array.join(@csv_options[:col_sep])
end

#stub_tmp_fileObject



35
36
37
# File 'lib/remi/cucumber/data_source.rb', line 35

def stub_tmp_file
  @stub_tmp_file ||= Tempfile.new('stub_tmp_file.csv').path
end

#to_dataframeObject

Public: Converts extracted data to a dataframe. Currently only supports Daru DataFrames.

Returns a Remi::DataFrame



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/remi/data_subject/csv_file.rb', line 52

def to_dataframe
  # Assumes that each file has exactly the same structure
  result_df = nil
  extract.each_with_index do |filename, idx|
    @logger.info "Converting #{filename} to a dataframe"
    processed_filename = preprocess(filename)
    csv_df = Daru::DataFrame.from_csv processed_filename, @csv_options

    csv_df[@filename_field] = Daru::Vector.new([filename] * csv_df.size, index: csv_df.index) if @filename_field
    if idx == 0
      result_df = csv_df
    else
      result_df = result_df.concat csv_df
    end
  end

  Remi::DataFrame.create(:daru, result_df)
end

#valid_headers?Boolean

Returns:

  • (Boolean)


103
104
105
# File 'lib/remi/data_subject/csv_file.rb', line 103

def valid_headers?
  (fields.keys - headers).empty?
end

#write_stub_tmp_fileObject



39
40
41
42
43
44
45
46
# File 'lib/remi/cucumber/data_source.rb', line 39

def write_stub_tmp_file
  File.open(stub_tmp_file, "wb") do |file|
    file.puts stub_header
    file.puts stub_row_csv
  end

  stub_tmp_file
end