Module: Daru::IO
- Defined in:
- lib/daru/io/io.rb,
lib/daru/io/csv/converters.rb,
lib/daru/io/sql_data_source.rb
Defined Under Namespace
Modules: CSV Classes: SqlDataSource
Class Method Summary collapse
- .dataframe_write_csv(dataframe, path, opts = {}) ⇒ Object
- .dataframe_write_excel(dataframe, path, _opts = {}) ⇒ Object
- .dataframe_write_sql(ds, dbh, table) ⇒ Object
-
.from_activerecord(relation, *fields) ⇒ Object
Load dataframe from AR::Relation.
-
.from_csv(path, opts = {}) ⇒ Object
Functions for loading/writing CSV files.
-
.from_excel(path, opts = {}) ⇒ Object
Functions for loading/writing Excel files.
- .from_html(path, opts) ⇒ Object
-
.from_plaintext(filename, fields) ⇒ Object
Loading data from plain text files.
-
.from_sql(db, query) ⇒ Object
Execute a query and create a data frame from the result.
- .load(filename) ⇒ Object
- .read_from_excel(path, opts) ⇒ Object
-
.save(klass, filename) ⇒ Object
Loading and writing Marshalled DataFrame/Vector.
Class Method Details
.dataframe_write_csv(dataframe, path, opts = {}) ⇒ Object
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/daru/io/io.rb', line 101 def dataframe_write_csv dataframe, path, opts={} = { converters: :numeric }.merge(opts) writer = ::CSV.open(path, 'w', ) writer << dataframe.vectors.to_a unless [:headers] == false dataframe.each_row do |row| writer << if [:convert_comma] row.map { |v| v.to_s.tr('.', ',') } else row.to_a end end writer.close end |
.dataframe_write_excel(dataframe, path, _opts = {}) ⇒ Object
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/daru/io/io.rb', line 70 def dataframe_write_excel dataframe, path, _opts={} book = Spreadsheet::Workbook.new sheet = book.create_worksheet format = Spreadsheet::Format.new color: :blue, weight: :bold sheet.row(0).concat(dataframe.vectors.to_a.map(&:to_s)) # Unfreeze strings sheet.row(0).default_format = format i = 1 dataframe.each_row do |row| sheet.row(i).concat(row.to_a) i += 1 end book.write(path) end |
.dataframe_write_sql(ds, dbh, table) ⇒ Object
131 132 133 134 135 136 137 |
# File 'lib/daru/io/io.rb', line 131 def dataframe_write_sql ds, dbh, table require 'dbi' query = "INSERT INTO #{table} ("+ds.vectors.to_a.join(',')+') VALUES ('+(['?']*ds.vectors.size).join(',')+')' sth = dbh.prepare(query) ds.each_row { |c| sth.execute(*c.to_a) } true end |
.from_activerecord(relation, *fields) ⇒ Object
Load dataframe from AR::Relation
144 145 146 147 148 149 150 |
# File 'lib/daru/io/io.rb', line 144 def from_activerecord(relation, *fields) fields = relation.klass.column_names if fields.empty? fields = fields.map(&:to_sym) result = relation.pluck(*fields).transpose Daru::DataFrame.new(result, order: fields).tap(&:update) end |
.from_csv(path, opts = {}) ⇒ Object
Functions for loading/writing CSV files
87 88 89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/daru/io/io.rb', line 87 def from_csv path, opts={} , opts = from_csv_prepare_opts opts # Preprocess headers for detecting and correcting repetition in # case the :headers option is not specified. hsh = if opts[:headers] from_csv_hash_with_headers(path, opts) else from_csv_hash(path, opts) .tap { |hash| [:order] = hash.keys } end Daru::DataFrame.new(hsh,) end |
.from_excel(path, opts = {}) ⇒ Object
Functions for loading/writing Excel files.
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/daru/io/io.rb', line 41 def from_excel path, opts={} opts = { worksheet_id: 0, row_id: 0 }.merge opts worksheet, headers = read_from_excel(path, opts) df = Daru::DataFrame.new({}) headers.each_with_index do |h,i| col = worksheet.column(i).to_a col.delete_at 0 df[h] = col end df end |
.from_html(path, opts) ⇒ Object
184 185 186 187 188 189 190 191 192 |
# File 'lib/daru/io/io.rb', line 184 def from_html path, opts optional_gem 'mechanize', '~>2.7.5' page = Mechanize.new.get(path) page.search('table').map { |table| html_parse_table table } .keep_if { |table| html_search table, opts[:match] } .compact .map { |table| html_decide_values table, opts } .map { |table| html_table_to_dataframe table } end |
.from_plaintext(filename, fields) ⇒ Object
Loading data from plain text files
154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/daru/io/io.rb', line 154 def from_plaintext filename, fields ds = Daru::DataFrame.new({}, order: fields) fp = File.open(filename,'r') fp.each_line do |line| row = Daru::IOHelpers.process_row(line.strip.split(/\s+/),['']) next if row == ["\x1A"] ds.add_row(row) end ds.update fields.each { |f| ds[f].rename f } ds end |
.from_sql(db, query) ⇒ Object
Execute a query and create a data frame from the result
126 127 128 129 |
# File 'lib/daru/io/io.rb', line 126 def from_sql(db, query) require 'daru/io/sql_data_source' SqlDataSource.make_dataframe(db, query) end |
.load(filename) ⇒ Object
174 175 176 177 178 179 180 181 182 |
# File 'lib/daru/io/io.rb', line 174 def load filename if File.exist? filename o = false File.open(filename, 'r') { |fp| o = Marshal.load(fp) } o else false end end |
.read_from_excel(path, opts) ⇒ Object
58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/daru/io/io.rb', line 58 def read_from_excel path, opts optional_gem 'spreadsheet', '~>1.1.1' worksheet_id = opts[:worksheet_id] row_id = opts[:row_id] book = Spreadsheet.open path worksheet = book.worksheet worksheet_id headers = ArrayHelper.recode_repeated(worksheet.row(row_id)).map(&:to_sym) [worksheet, headers] end |
.save(klass, filename) ⇒ Object
Loading and writing Marshalled DataFrame/Vector
168 169 170 171 172 |
# File 'lib/daru/io/io.rb', line 168 def save klass, filename fp = File.open(filename, 'w') Marshal.dump(klass, fp) fp.close end |