Method: OpenTox::Parser::Spreadsheets#load_spreadsheet

Defined in:
lib/parser.rb

#load_spreadsheet(book, drop_missing = false) ⇒ OpenTox::Dataset

Load Spreadsheet book (created with roo gem roo.rubyforge.org/, excel format specification: toxcreate.org/help)

Parameters:

  • book (Excel)

    Excel workbook object (created with roo gem)

Returns:



313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/parser.rb', line 313

def load_spreadsheet(book, drop_missing=false)
  book.default_sheet = 0
  headers = book.row(1)
  add_features headers
  value_maps = Array.new
  regression_features=Array.new

  2.upto(book.last_row) { |i| 
    row = book.row(i)
    value_maps = detect_new_values(row, value_maps)
    value_maps.each_with_index { |vm,j|
      if vm.size > @max_class_values # 5 is the maximum nr of classes supported by Fminer.
        regression_features[j]=true 
      else
        regression_features[j]=false
      end
    }
  }

  2.upto(book.last_row) { |i| 
    drop=false
    row = book.row(i)
    raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size
    if row.include?("")
      @format_errors << "Row #{i} has #{row.count("")} missing values" 
      drop=true
      drop_missing=true if (row.count("") == row.size-1) 
    end
    add_values(row, regression_features) unless (drop_missing && drop)
    if (drop_missing && drop) 
      @format_errors << "Row #{i} not added" 
    end
  }
  warnings
  @dataset
end