Module: Daru::IO

Defined in:
lib/daru/io/io.rb,
lib/daru/io/csv/converters.rb,
lib/daru/io/sql_data_source.rb

Defined Under Namespace

Modules: CSV Classes: SqlDataSource

Class Method Summary collapse

Class Method Details

.dataframe_write_csv(dataframe, path, opts = {}) ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/daru/io/io.rb', line 101

def dataframe_write_csv dataframe, path, opts={}
  options = {
    converters: :numeric
  }.merge(opts)

  writer = ::CSV.open(path, 'w', options)
  writer << dataframe.vectors.to_a unless options[:headers] == false

  dataframe.each_row do |row|
    writer << if options[:convert_comma]
                row.map { |v| v.to_s.tr('.', ',') }
              else
                row.to_a
              end
  end

  writer.close
end

.dataframe_write_excel(dataframe, path, _opts = {}) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/daru/io/io.rb', line 70

def dataframe_write_excel dataframe, path, _opts={}
  book   = Spreadsheet::Workbook.new
  sheet  = book.create_worksheet
  format = Spreadsheet::Format.new color: :blue, weight: :bold

  sheet.row(0).concat(dataframe.vectors.to_a.map(&:to_s)) # Unfreeze strings
  sheet.row(0).default_format = format
  i = 1
  dataframe.each_row do |row|
    sheet.row(i).concat(row.to_a)
    i += 1
  end

  book.write(path)
end

.dataframe_write_sql(ds, dbh, table) ⇒ Object



131
132
133
134
135
136
137
# File 'lib/daru/io/io.rb', line 131

def dataframe_write_sql ds, dbh, table
  require 'dbi'
  query = "INSERT INTO #{table} ("+ds.vectors.to_a.join(',')+') VALUES ('+(['?']*ds.vectors.size).join(',')+')'
  sth   = dbh.prepare(query)
  ds.each_row { |c| sth.execute(*c.to_a) }
  true
end

.from_activerecord(relation, *fields) ⇒ Object

Load dataframe from AR::Relation

Parameters:

  • relation (ActiveRecord::Relation)

    A relation to be used to load the contents of dataframe

Returns:

  • A dataframe containing the data in the given relation



144
145
146
147
148
149
150
# File 'lib/daru/io/io.rb', line 144

def from_activerecord(relation, *fields)
  fields = relation.klass.column_names if fields.empty?
  fields = fields.map(&:to_sym)

  result = relation.pluck(*fields).transpose
  Daru::DataFrame.new(result, order: fields).tap(&:update)
end

.from_csv(path, opts = {}) ⇒ Object

Functions for loading/writing CSV files



87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/daru/io/io.rb', line 87

def from_csv path, opts={}
  daru_options, opts = from_csv_prepare_opts opts
  # Preprocess headers for detecting and correcting repetition in
  # case the :headers option is not specified.
  hsh =
    if opts[:headers]
      from_csv_hash_with_headers(path, opts)
    else
      from_csv_hash(path, opts)
        .tap { |hash| daru_options[:order] = hash.keys }
    end
  Daru::DataFrame.new(hsh,daru_options)
end

.from_excel(path, opts = {}) ⇒ Object

Functions for loading/writing Excel files.



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/daru/io/io.rb', line 41

def from_excel path, opts={}
  opts = {
    worksheet_id: 0,
    row_id: 0
  }.merge opts

  worksheet, headers = read_from_excel(path, opts)
  df = Daru::DataFrame.new({})
  headers.each_with_index do |h,i|
    col = worksheet.column(i).to_a
    col.delete_at 0
    df[h] = col
  end

  df
end

.from_html(path, opts) ⇒ Object



184
185
186
187
188
189
190
191
192
# File 'lib/daru/io/io.rb', line 184

def from_html path, opts
  optional_gem 'mechanize', '~>2.7.5'
  page = Mechanize.new.get(path)
  page.search('table').map { |table| html_parse_table table }
      .keep_if { |table| html_search table, opts[:match] }
      .compact
      .map { |table| html_decide_values table, opts }
      .map { |table| html_table_to_dataframe table }
end

.from_plaintext(filename, fields) ⇒ Object

Loading data from plain text files



154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/daru/io/io.rb', line 154

def from_plaintext filename, fields
  ds = Daru::DataFrame.new({}, order: fields)
  fp = File.open(filename,'r')
  fp.each_line do |line|
    row = Daru::IOHelpers.process_row(line.strip.split(/\s+/),[''])
    next if row == ["\x1A"]
    ds.add_row(row)
  end
  ds.update
  fields.each { |f| ds[f].rename f }
  ds
end

.from_sql(db, query) ⇒ Object

Execute a query and create a data frame from the result

Parameters:

  • db (DBI::DatabaseHandle, String)

    A DBI connection OR Path to a SQlite3 database.

  • query (String)

    The query to be executed

Returns:

  • A dataframe containing the data resulting from the query



126
127
128
129
# File 'lib/daru/io/io.rb', line 126

def from_sql(db, query)
  require 'daru/io/sql_data_source'
  SqlDataSource.make_dataframe(db, query)
end

.load(filename) ⇒ Object



174
175
176
177
178
179
180
181
182
# File 'lib/daru/io/io.rb', line 174

def load filename
  if File.exist? filename
    o = false
    File.open(filename, 'r') { |fp| o = Marshal.load(fp) }
    o
  else
    false
  end
end

.read_from_excel(path, opts) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
# File 'lib/daru/io/io.rb', line 58

def read_from_excel path, opts
  optional_gem 'spreadsheet', '~>1.1.1'

  worksheet_id = opts[:worksheet_id]
  row_id       = opts[:row_id]
  book         = Spreadsheet.open path
  worksheet    = book.worksheet worksheet_id
  headers      = ArrayHelper.recode_repeated(worksheet.row(row_id)).map(&:to_sym)

  [worksheet, headers]
end

.save(klass, filename) ⇒ Object

Loading and writing Marshalled DataFrame/Vector



168
169
170
171
172
# File 'lib/daru/io/io.rb', line 168

def save klass, filename
  fp = File.open(filename, 'w')
  Marshal.dump(klass, fp)
  fp.close
end