Method: OpenTox::Parser::Spreadsheets#load_spreadsheet
- Defined in:
- lib/parser.rb
#load_spreadsheet(book, drop_missing = false) ⇒ OpenTox::Dataset
Load Spreadsheet book (created with roo gem roo.rubyforge.org/, excel format specification: toxcreate.org/help)
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 |
# File 'lib/parser.rb', line 313 def load_spreadsheet(book, drop_missing=false) book.default_sheet = 0 headers = book.row(1) add_features headers value_maps = Array.new regression_features=Array.new 2.upto(book.last_row) { |i| row = book.row(i) value_maps = detect_new_values(row, value_maps) value_maps.each_with_index { |vm,j| if vm.size > @max_class_values # 5 is the maximum nr of classes supported by Fminer. regression_features[j]=true else regression_features[j]=false end } } 2.upto(book.last_row) { |i| drop=false row = book.row(i) raise "Entry has size #{row.size}, different from headers (#{headers.size})" if row.size != headers.size if row.include?("") @format_errors << "Row #{i} has #{row.count("")} missing values" drop=true drop_missing=true if (row.count("") == row.size-1) end add_values(row, regression_features) unless (drop_missing && drop) if (drop_missing && drop) @format_errors << "Row #{i} not added" end } warnings @dataset end |