Module: DaruLite::IO

Defined in:
lib/daru_lite/io/io.rb,
lib/daru_lite/io/csv/converters.rb,
lib/daru_lite/io/sql_data_source.rb

Defined Under Namespace

Modules: CSV Classes: SqlDataSource

Constant Summary collapse

DARU_OPT_KEYS =
%i[clone order index name].freeze

Class Method Summary collapse

Class Method Details

.dataframe_write_csv(dataframe, path, opts = {}) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/daru_lite/io/io.rb', line 109

def dataframe_write_csv(dataframe, path, opts = {})
  options = {
    converters: :numeric
  }.merge(opts)

  writer = ::CSV.open(path, 'w', **options)
  writer << dataframe.vectors.to_a unless options[:headers] == false

  dataframe.each_row do |row|
    writer << if options[:convert_comma]
                row.map { |v| v.to_s.tr('.', ',') }
              else
                row.to_a
              end
  end

  writer.close
end

.dataframe_write_excel(dataframe, path, _opts = {}) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/daru_lite/io/io.rb', line 78

def dataframe_write_excel(dataframe, path, _opts = {})
  book   = Spreadsheet::Workbook.new
  sheet  = book.create_worksheet
  format = Spreadsheet::Format.new color: :blue, weight: :bold

  sheet.row(0).concat(dataframe.vectors.to_a.map(&:to_s)) # Unfreeze strings
  sheet.row(0).default_format = format
  i = 1
  dataframe.each_row do |row|
    sheet.row(i).concat(row.to_a)
    i += 1
  end

  book.write(path)
end

.dataframe_write_sql(ds, dbh, table) ⇒ Object



140
141
142
143
144
145
# File 'lib/daru_lite/io/io.rb', line 140

def dataframe_write_sql(ds, dbh, table)
  query = "INSERT INTO #{table} (#{ds.vectors.to_a.join(',')}) VALUES (#{(['?'] * ds.vectors.size).join(',')})"
  sth   = dbh.prepare(query)
  ds.each_row { |c| sth.execute(*c.to_a) }
  true
end

.from_activerecord(relation, *fields) ⇒ Object

Load dataframe from AR::Relation

Parameters:

  • relation (ActiveRecord::Relation)

    A relation to be used to load the contents of dataframe

Returns:

  • A dataframe containing the data in the given relation



152
153
154
155
156
157
158
# File 'lib/daru_lite/io/io.rb', line 152

def from_activerecord(relation, *fields)
  fields = relation.klass.column_names if fields.empty?
  fields = fields.map(&:to_sym)

  result = relation.pluck(*fields).transpose
  DaruLite::DataFrame.new(result, order: fields).tap(&:update)
end

.from_csv(path, opts = {}) ⇒ Object

Functions for loading/writing CSV files



95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/daru_lite/io/io.rb', line 95

def from_csv(path, opts = {})
  daru_options, opts = from_csv_prepare_opts opts
  # Preprocess headers for detecting and correcting repetition in
  # case the :headers option is not specified.
  hsh =
    if opts[:headers]
      from_csv_hash_with_headers(path, opts)
    else
      from_csv_hash(path, opts)
        .tap { |hash| daru_options[:order] = hash.keys }
    end
  DaruLite::DataFrame.new(hsh, daru_options)
end

.from_excel(path, opts = {}) ⇒ Object

Functions for loading/writing Excel files.



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/daru_lite/io/io.rb', line 49

def from_excel(path, opts = {})
  opts = {
    worksheet_id: 0,
    row_id: 0
  }.merge opts

  worksheet, headers = read_from_excel(path, opts)
  df = DaruLite::DataFrame.new({})
  headers.each_with_index do |h, i|
    col = worksheet.column(i).to_a
    col.delete_at 0
    df[h] = col
  end

  df
end

.from_plaintext(filename, fields) ⇒ Object

Loading data from plain text files



162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/daru_lite/io/io.rb', line 162

def from_plaintext(filename, fields)
  ds = DaruLite::DataFrame.new({}, order: fields)
  fp = File.open(filename, 'r')
  fp.each_line do |line|
    row = DaruLite::IOHelpers.process_row(line.strip.split(/\s+/), [''])
    next if row == ["\x1A"]

    ds.add_row(row)
  end
  ds.update
  fields.each { |f| ds[f].rename f }
  ds
end

.from_sql(db, query) ⇒ Object

Execute a query and create a data frame from the result

OR Path to a SQlite3 database.

Parameters:

  • db (ActiveRecord::ConnectionAdapters::AbstractAdapter, String)

    An ActiveRecord connection

  • query (String)

    The query to be executed

Returns:

  • A dataframe containing the data resulting from the query



135
136
137
138
# File 'lib/daru_lite/io/io.rb', line 135

def from_sql(db, query)
  require 'daru_lite/io/sql_data_source'
  SqlDataSource.make_dataframe(db, query)
end

.load(filename) ⇒ Object



183
184
185
186
187
188
189
190
191
# File 'lib/daru_lite/io/io.rb', line 183

def load(filename)
  if File.exist? filename
    o = false
    File.open(filename, 'r') { |fp| o = Marshal.load(fp) }
    o
  else
    false
  end
end

.read_from_excel(path, opts) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
# File 'lib/daru_lite/io/io.rb', line 66

def read_from_excel(path, opts)
  optional_gem 'spreadsheet', '~>1.3.0'

  worksheet_id = opts[:worksheet_id]
  row_id       = opts[:row_id]
  book         = Spreadsheet.open path
  worksheet    = book.worksheet worksheet_id
  headers      = ArrayHelper.recode_repeated(worksheet.row(row_id)).map(&:to_sym)

  [worksheet, headers]
end

.save(klass, filename) ⇒ Object

Loading and writing Marshalled DataFrame/Vector



177
178
179
180
181
# File 'lib/daru_lite/io/io.rb', line 177

def save(klass, filename)
  fp = File.open(filename, 'w')
  Marshal.dump(klass, fp)
  fp.close
end