Class: OpenTox::Parser::Spreadsheets

Inherits:
Object
  • Object
show all
Defined in:
lib/parser.rb

Overview

Parser for getting spreadsheet data into a dataset

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeSpreadsheets

Returns a new instance of Spreadsheets.



288
289
290
291
292
293
294
295
296
297
# File 'lib/parser.rb', line 288

def initialize
  @data = []
  @features = []
  @feature_types = {}

  @format_errors = ""
  @smiles_errors = []
  @activity_errors = []
  @duplicates = {}
end

Instance Attribute Details

#datasetObject

Returns the value of attribute dataset.



286
287
288
# File 'lib/parser.rb', line 286

def dataset
  @dataset
end

Instance Method Details

#detect_new_values(row, value_maps) ⇒ Object



299
300
301
302
303
304
305
306
307
# File 'lib/parser.rb', line 299

def detect_new_values(row, value_maps)
  row.shift
  row.each_index do |i|
    value = row[i]
    value_maps[i] = Hash.new if value_maps[i].nil?
    value_maps[i][value].nil? ? value_maps[i][value]=0 : value_maps[i][value] += 1
  end
  value_maps
end

#load_csv(csv) ⇒ OpenTox::Dataset

Load CSV string (format specification: toxcreate.org/help)

Parameters:

  • csv (String)

    CSV representation of the dataset

Returns:



339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# File 'lib/parser.rb', line 339

def load_csv(csv)
  row = 0
  input = csv.split("\n")
  add_features split_row(input.shift)
  value_maps = Array.new
  regression_features=Array.new

  input.each { |row| 
    row = split_row(row)
    value_maps = detect_new_values(row, value_maps)
    value_maps.each_with_index { |vm,j|
      if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
        regression_features[j]=true 
      else
        regression_features[j]=false
      end
    }
  }
  input.each { |row| 
    add_values split_row(row), regression_features
  }
  warnings
  @dataset
end

#load_spreadsheet(book) ⇒ OpenTox::Dataset

Load Spreadsheet book (created with roo gem roo.rubyforge.org/, excel format specification: toxcreate.org/help)

Parameters:

  • book (Excel)

    Excel workbook object (created with roo gem)

Returns:



312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
# File 'lib/parser.rb', line 312

def load_spreadsheet(book)
  book.default_sheet = 0
  add_features book.row(1)
  value_maps = Array.new
  regression_features=Array.new

  2.upto(book.last_row) { |i| 
    row = book.row(i)
    value_maps = detect_new_values(row, value_maps)
    value_maps.each_with_index { |vm,j|
      if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
        regression_features[j]=true 
      else
        regression_features[j]=false
      end
    }
  }
  2.upto(book.last_row) { |i| 
    add_values book.row(i), regression_features
  }
  warnings
  @dataset
end