Class: OpenTox::Parser::Sdf

Inherits:
Object
  • Object
show all
Defined in:
lib/parser.rb

Overview

quick hack to enable sdf import via csv should be refactored

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeSdf

Returns a new instance of Sdf.



633
634
635
636
637
638
639
# File 'lib/parser.rb', line 633

def initialize
  @data = {}

  @compound_errors = []
  @activity_errors = []
  @duplicates = {}
end

Instance Attribute Details

#datasetObject

Returns the value of attribute dataset.



631
632
633
# File 'lib/parser.rb', line 631

def dataset
  @dataset
end

Instance Method Details

#load_sdf(sdf) ⇒ Object



641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
# File 'lib/parser.rb', line 641

def load_sdf(sdf)

  obconversion = OpenBabel::OBConversion.new
  obmol = OpenBabel::OBMol.new
  obconversion.set_in_and_out_formats "sdf", "inchi"

  table = Table.new

  properties = []
  sdf.each_line { |l| properties << l.to_s if l.match(/</) }
  properties.uniq!
  properties.sort!
  properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp }

  rec = 0
  sdf.split(/\$\$\$\$\r*\n/).each do |s|
    rec += 1
    obconversion.read_string obmol, s
    begin
      inchi = obconversion.write_string(obmol).gsub(/\s/,'').chomp 
      @duplicates[inchi] = [] unless @duplicates[inchi]
      @duplicates[inchi] << rec #inchi#+", "+row.join(", ")
      compound = Compound.from_inchi inchi
    rescue
      @compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec}) have been ignored! \n#{s}"
      next
    end
    row = {}
    obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) }
    table.data[compound.uri] = row
  end

  # find and remove ignored_features
  @activity_errors = table.clean_features
  table.add_to_dataset @dataset

  warnings = ''
  warnings += "<p>Incorrect structures (ignored):</p>" + @compound_errors.join("<br/>") unless @compound_errors.empty?
  warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
  duplicate_warnings = ''
  @duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
  warnings += "<p>Duplicated structures (all structures/activities used for model building, please  make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?

  @dataset.[OT.Warnings] = warnings 
  @dataset

end