Class: CsvMadness::Sheet

Inherits:
Object
  • Object
show all
Defined in:
lib/csv_madness/sheet.rb

Constant Summary collapse

COLUMN_TYPES =
{
  number: Proc.new do |cell, record|
    rval = cell
    
    unless cell.nil? || (cell.is_a?(String) && cell.length == 0)
    
      begin
        rval = Integer(cell)
      rescue
        # do nothing
      end
    
      unless rval.is_a?(Integer)
        begin
          rval = Float(cell)
        rescue
          # do nothing
        end
      end
    end
    
    rval
  end,
  
  integer: Proc.new do |cell, record|
    begin
      Integer(cell)
    rescue
      cell
    end
  end,
  
  float:   Proc.new do |cell, record|
    begin
      Float(cell)
    rescue
      cell
    end
  end,
  
  date:    Proc.new do |cell, record|
    begin
      parse = Time.parse( cell || "" )
    rescue ArgumentError
      if cell =~ /^Invalid Time Format: /
        parse = cell
      else
        parse = "Invalid Time Format: <#{cell}>"
      end
    end
    
    parse
  end
}
FORBIDDEN_COLUMN_NAMES =

breaks things hard when you use them. Probably not comprehensive, sadly.

[:to_s]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Sheet

opts:

index: ( [:id, :id2 ] )
    columns you want mapped for quick 
    lookup of individual records

columns: ( [:fname, :lname, :age] )  
    an array of symbols, corresponding
    to the csv rows they represent (first, second, third)
    and designating the method for calling the cell in 
    a given record.  If not provided, it will guess based
    on the header row.

header:   false       
    anything else, we assume the csv file has a header row


156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/csv_madness/sheet.rb', line 156

def initialize( *args )
  if args.last.is_a?(Hash)
    @opts = args.pop
  else
    @opts = {}
  end
  
  firstarg = args.shift
  
  case firstarg
  when NilClass
    @spreadsheet_file = nil
    @opts[:columns] ||= []
  when String, FunWith::Files::FilePath, Pathname
    @spreadsheet_file = self.class.find_spreadsheet_in_filesystem( firstarg )
  when Array
    @spreadsheet_file = nil
    @opts[:columns] ||= firstarg
  end
  
  @opts[:header] = (@opts[:header] == false ? false : true)  # true unless already explicitly set to false
  
  reload_spreadsheet
end

Instance Attribute Details

#columnsObject

Returns the value of attribute columns.



141
142
143
# File 'lib/csv_madness/sheet.rb', line 141

def columns
  @columns
end

#index_columnsObject

Returns the value of attribute index_columns.



141
142
143
# File 'lib/csv_madness/sheet.rb', line 141

def index_columns
  @index_columns
end

#optsObject

Returns the value of attribute opts.



141
142
143
# File 'lib/csv_madness/sheet.rb', line 141

def opts
  @opts
end

#record_classObject

Returns the value of attribute record_class.



141
142
143
# File 'lib/csv_madness/sheet.rb', line 141

def record_class
  @record_class
end

#recordsObject

Returns the value of attribute records.



141
142
143
# File 'lib/csv_madness/sheet.rb', line 141

def records
  @records
end

#spreadsheet_fileObject

Returns the value of attribute spreadsheet_file.



141
142
143
# File 'lib/csv_madness/sheet.rb', line 141

def spreadsheet_file
  @spreadsheet_file
end

Class Method Details

.add_search_path(path) ⇒ Object

Paths to be searched when CsvMadness.load( “filename.csv” ) is called.



74
75
76
77
78
79
80
81
82
# File 'lib/csv_madness/sheet.rb', line 74

def self.add_search_path( path )
  @search_paths ||= []
  path = Pathname.new( path ).expand_path
  unless path.directory?
    raise "The given path does not exist"
  end
  
  @search_paths << path unless @search_paths.include?( path )
end

.find_spreadsheet_in_filesystem(name) ⇒ Object

Search absolute/relative-to-current-dir before checking search paths.



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/csv_madness/sheet.rb', line 98

def self.find_spreadsheet_in_filesystem( name )
  @search_paths ||= []

  expanded_path = Pathname.new( name ).expand_path
  if expanded_path.exist?
    return expanded_path
  else   # look for it in the search paths
    @search_paths.each do |p|
      file = p.join( name )
      if file.exist? && file.file?
        return p.join( name )
      end
    end
  end
  
  nil
end

.from(csv_file, opts = {}) ⇒ Object



88
89
90
91
92
93
94
# File 'lib/csv_madness/sheet.rb', line 88

def self.from( csv_file, opts = {} )
  if f = find_spreadsheet_in_filesystem( csv_file )
    Sheet.new( f, opts )
  else
    raise "File not found."
  end
end

.getter_name(name) ⇒ Object

Used to make getter/setter names out of the original header strings. “ hello;: world! ” => :hello_world



62
63
64
65
66
67
68
69
70
# File 'lib/csv_madness/sheet.rb', line 62

def self.getter_name( name )
  name = name.strip.gsub( /\s+/, "_" ).gsub( /(\W|_)+/, "_" ).downcase
  name = name.gsub( /_+$/, "" )
  if name.match( /^\d/ )
    name = "_#{name}"
  end
  
  name.to_sym
end

.search_pathsObject



84
85
86
# File 'lib/csv_madness/sheet.rb', line 84

def self.search_paths
  @search_paths
end

.to_csv(spreadsheet, opts = {}) ⇒ Object

opts are passed to underlying CSV (:row_sep, :encoding, :force_quotes)



117
118
119
120
121
122
# File 'lib/csv_madness/sheet.rb', line 117

def self.to_csv( spreadsheet, opts = {} )
  out = spreadsheet.columns.to_csv( opts )
  spreadsheet.records.inject( out ) do |output, record|
    output << record.to_csv( opts )
  end
end

.write_to_file(spreadsheet, file, opts = {}) ⇒ Object



124
125
126
127
128
129
# File 'lib/csv_madness/sheet.rb', line 124

def self.write_to_file( spreadsheet, file, opts = {} )
  file = file.fwf_filepath.expand_path
  File.open( file, "w" ) do |f|
    f << spreadsheet.to_csv( opts )
  end
end

Instance Method Details

#<<(record) ⇒ Object



181
182
183
# File 'lib/csv_madness/sheet.rb', line 181

def <<( record )
  self.add_record( record )
end

#[](offset) ⇒ Object



257
258
259
# File 'lib/csv_madness/sheet.rb', line 257

def [] offset
  @records[offset]
end

#add_column(column, &block) ⇒ Object

If no block given, adds an empty column



369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
# File 'lib/csv_madness/sheet.rb', line 369

def add_column( column, &block )
  raise "Column already exists: #{column}" if @columns.include?( column )
  raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
  @columns << column
  
  # add empty column to each row
  @records.map{ |r|
    r.csv_data << {column => ""} 
  }
  
  update_data_accessor_module
  
  if block_given?
    alter_column( column ) do |val, record|
      yield val, record
    end
  end
end

#add_record(record) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/csv_madness/sheet.rb', line 185

def add_record( record )
  case record
  when Array
    # CSV::Row.new( column names, column_entries ) (in same order as columns, natch) 
    record = CSV::Row.new( self.columns, record )
  when Hash
    header = []
    fields = []
    
    for col in self.columns
      header << col
      fields << record[col]
    end
    
    record = CSV::Row.new( header, fields )
  when CSV::Row
    # do nothing
  else
    raise "sheet.add_record() doesn't take objects of type #{record.inspect}" unless record.respond_to?(:csv_data)
    record = record.csv_data
  end
  
  record = @record_class.new( record )
  @records << record
  add_to_indexes( record )
end

#add_record_methods(mod = nil, &block) ⇒ Object

Note: If a block is given, the mod arg will be ignored.



442
443
444
445
446
447
448
# File 'lib/csv_madness/sheet.rb', line 442

def add_record_methods( mod = nil, &block )
  if block_given?
    mod = Module.new( &block )
  end
  @record_class.send( :include, mod )
  self
end

#alter_cells(blank = :undefined, &block) ⇒ Object

if blank is defined, only the records which are non-blank in that column will actually be yielded. The rest will be set to the provided default



348
349
350
351
352
# File 'lib/csv_madness/sheet.rb', line 348

def alter_cells( blank = :undefined, &block )
  @columns.each_with_index do |column, cindex|
    alter_column( column, blank, &block )
  end
end

#alter_column(column, blank = :undefined, &block) ⇒ Object



354
355
356
357
358
359
360
361
362
363
364
365
366
# File 'lib/csv_madness/sheet.rb', line 354

def alter_column( column, blank = :undefined, &block )
  raise "Column does not exist: #{column}" unless @columns.include?( column )
  
  if cindex = @columns.index( column )
    for record in @records
      if record.blank?(column) && blank != :undefined
        record[cindex] = blank
      else
        record[cindex] = yield( record[cindex], record )
      end
    end
  end
end

#blankedObject

give a copy of the current spreadsheet, but with no records



294
295
296
297
298
299
300
301
302
303
304
305
306
# File 'lib/csv_madness/sheet.rb', line 294

def blanked()
  sheet = self.class.new
  sheet.columns = @columns.clone
  sheet.index_columns = @index_columns.clone
  sheet.records = []
  sheet.spreadsheet_file = nil
  sheet.create_data_accessor_module
  sheet.create_record_class
  sheet.opts = @opts.clone
  sheet.reindex
  
  sheet
end

#column(col) ⇒ Object



332
333
334
# File 'lib/csv_madness/sheet.rb', line 332

def column col
  @records.map(&col)
end

#concat_columns(col1, col2, opts = {}) ⇒ Object Also known as: concatenate

By default, the



428
429
430
431
432
433
434
435
436
437
# File 'lib/csv_madness/sheet.rb', line 428

def concat_columns( col1, col2, opts = {} )
  opts =  {:separator => '', :out => col1}.merge( opts )
  
  column_must_exist( col1, col2 )
  self.add_column( opts[:out] ) unless self.columns.include?( opts[:out] )
  
  for record in self.records
    record[ opts[:out] ] = "#{record[col1]}#{opts[:separator]}#{record[col2]}"
  end
end

#drop_column(column) ⇒ Object



388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/csv_madness/sheet.rb', line 388

def drop_column( column )
  raise "Column does not exist" unless @columns.include?( column )
  
  @columns.delete( column )
  
  key = column.to_s
  
  @records.map{ |r|
    r.csv_data.delete( key )
  }
  
  update_data_accessor_module
end

#fetch(index_col, key) ⇒ Object

Fetches an indexed record based on the column indexed and the keying object. If key is an array of keying objects, returns an array of records in the same order that the keying objects appear. Index column should yield a different, unique value for each record.



265
266
267
268
269
270
271
# File 'lib/csv_madness/sheet.rb', line 265

def fetch( index_col, key )
  if key.is_a?(Array)
    key.map{ |key| @indexes[index_col][key] }
  else
    @indexes[index_col][key]
  end
end

#filter(&block) ⇒ Object

function should take an object, and return either true or false returns an array of objects that respond true when put through the meat grinder



276
277
278
279
280
281
282
283
# File 'lib/csv_madness/sheet.rb', line 276

def filter( &block )
  rval = []
  @records.each do |record|
    rval << record if ( yield record )
  end

  rval
end

#filter!(&block) ⇒ Object

removes rows which fail the given test from the spreadsheet.



286
287
288
289
290
# File 'lib/csv_madness/sheet.rb', line 286

def filter!( &block )
  @records = self.filter( &block )
  reindex
  @records
end

#lengthObject



457
458
459
# File 'lib/csv_madness/sheet.rb', line 457

def length
  self.records.length
end

#merge_columns(source, dest, opts = {}) ⇒ Object

If :reverse_merge is true, then the dest column is only overwritten for records where :dest is blank



414
415
416
417
418
419
420
421
422
423
424
425
# File 'lib/csv_madness/sheet.rb', line 414

def merge_columns( source, dest, opts = {} )
  opts = { :drop_source => true, :reverse_merge => false, :default => "" }.merge( opts )
  column_must_exist( source, dest )
  
  self.records.each do |record|
    if opts[:reverse_merge] == false || record.blank?( dest )
      record[dest] = record.blank?(source) ? opts[:default] : record[source]
    end
  end
  
  self.drop_column( source ) if opts[:drop_source]
end

#multiple_columns(*args) ⇒ Object

retrieve multiple columns. Returns an array of the form

[record1:col1, record1:col2…], [record2:col1, record2:col2…

]



338
339
340
341
342
343
# File 'lib/csv_madness/sheet.rb', line 338

def multiple_columns(*args)
  @records.inject([]){ |memo, record|
    memo << args.map{ |arg| record.send(arg) }
    memo
  }
end

#nils_are_blank_stringsObject

Note: If implementation of Record[] changes, so must this.



451
452
453
454
455
# File 'lib/csv_madness/sheet.rb', line 451

def nils_are_blank_strings
  alter_cells do |value, record|
    value.nil? ? "" : value
  end
end

#reload_spreadsheet(opts = @opts) ⇒ Object



236
237
238
239
240
241
242
243
244
# File 'lib/csv_madness/sheet.rb', line 236

def reload_spreadsheet( opts = @opts )
  load_csv if @spreadsheet_file
  set_initial_columns( opts[:columns] )
  create_record_class
  package
  
  set_index_columns( opts[:index] )
  reindex
end

#remove_record(record) ⇒ Object

record can be the row number (integer from 0…@records.length) record can be the record itself (anonymous class)



214
215
216
217
218
219
220
# File 'lib/csv_madness/sheet.rb', line 214

def remove_record( record )
  record = @records[record] if record.is_a?(Integer)
  return if record.nil?
  
  self.remove_from_index( record )
  @records.delete( record )
end

#remove_records(records = nil, &block) ⇒ Object

Here’s the deal: you hand us a block, and we’ll remove the records for which it yields true.



224
225
226
227
228
229
230
231
232
233
234
# File 'lib/csv_madness/sheet.rb', line 224

def remove_records( records = nil, &block )
  if block_given?
    for record in @records
      remove_record( record ) if yield( record ) == true
    end
  else # records should be an array
    for record in records
      self.remove_record( record )
    end
  end
end

#rename_column(column, new_name) ⇒ Object



402
403
404
405
406
# File 'lib/csv_madness/sheet.rb', line 402

def rename_column( column, new_name )
  @columns[@columns.index(column)] = new_name
  rename_index_column( column, new_name ) if @index_columns.include?( column )
  update_data_accessor_module
end

#set_column_type(column, type, blank = :undefined) ⇒ Object



408
409
410
# File 'lib/csv_madness/sheet.rb', line 408

def set_column_type( column, type, blank = :undefined )
  alter_column( column, blank, &COLUMN_TYPES[type] )
end

#set_index_columns(index_columns) ⇒ Object



246
247
248
249
250
251
252
253
254
255
# File 'lib/csv_madness/sheet.rb', line 246

def set_index_columns( index_columns )
  @index_columns = case index_columns
  when NilClass
    []
  when Symbol
    [ index_columns ]
  when Array
    index_columns
  end
end

#split(&block) ⇒ Object

give a block, and get back a hash.

The hash keys are the results of the block. The hash values are copies of the spreadsheets, with only the records which caused the block to return the key.



312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
# File 'lib/csv_madness/sheet.rb', line 312

def split( &block )
  sheets = Hash.new
  
  for record in @records
    result_key = yield record
    ( sheets[result_key] ||= self.blanked() ) << record
  end

  sheets
  # sheet_args = self.blanked
  # for key, record_set in records
  #   sheet = self.clone
  #   sheet.records = 
  #   
  #   records[key] = sheet
  # end
  # 
  # records
end

#to_csv(opts = {}) ⇒ Object



135
136
137
138
139
# File 'lib/csv_madness/sheet.rb', line 135

def to_csv( opts = {} )
  self.records.inject( self.columns.to_csv( opts ) ) do |output, record|
    output << record.to_csv( opts )
  end
end

#write_to_file(file, opts = {}) ⇒ Object



131
132
133
# File 'lib/csv_madness/sheet.rb', line 131

def write_to_file( file, opts = {} )
  self.class.write_to_file( self, file, opts )
end