Class: Bio::SQL::Sequence

Inherits:
Object show all
Defined in:
lib/bio/db/biosql/sequence.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Sequence

Returns a new instance of Sequence.



106
107
108
109
110
111
112
# File 'lib/bio/db/biosql/sequence.rb', line 106

def initialize(options={})
  #options.assert_valid_keys(:entry, :biodatabase,:biosequence)
  return @entry = options[:entry] unless options[:entry].nil?

  return to_biosql(options[:biosequence], options[:biodatabase]) unless options[:biosequence].nil? or options[:biodatabase].nil?

end

Instance Attribute Details

#entryObject (readonly)

Returns the value of attribute entry.



83
84
85
# File 'lib/bio/db/biosql/sequence.rb', line 83

def entry
  @entry
end

Instance Method Details

#cdsfeaturesObject

return the seqfeature mapped from BioSQL with a type_term like ‘CDS’



323
324
325
# File 'lib/bio/db/biosql/sequence.rb', line 323

def cdsfeatures
  @entry.cdsfeatures
end

#comment=(value) ⇒ Object



420
421
422
423
424
# File 'lib/bio/db/biosql/sequence.rb', line 420

def comment=(value)
  #DELETE        comment=Comment.new({:bioentry=>@entry, :comment_text=>value, :rank=>@entry.comments.count.succ})
  comment = @entry.comments.build({:comment_text=>value, :rank=>@entry.comments.count.succ})
  comment.save
end

#commentsObject



393
394
395
396
397
# File 'lib/bio/db/biosql/sequence.rb', line 393

def comments
  @entry.comments.map do |comment|
    comment.comment_text
  end
end

#databaseObject



234
235
236
# File 'lib/bio/db/biosql/sequence.rb', line 234

def database
  @entry.biodatabase.name
end

#database_descObject



238
239
240
# File 'lib/bio/db/biosql/sequence.rb', line 238

def database_desc
  @entry.biodatabase.description
end

#deleteObject



85
86
87
88
89
90
91
# File 'lib/bio/db/biosql/sequence.rb', line 85

def delete
  #TODO: check is references connected to this bioentry are leaf or not.
  #actually I think it should be more sofisticated, check if there are
  #other bioentries connected to references; if not delete 'em
  @entry.references.each { |ref| ref.delete if ref.bioentries.size==1}
  @entry.destroy
end

#descriptionObject Also known as: definition



260
261
262
# File 'lib/bio/db/biosql/sequence.rb', line 260

def description
  @entry.description
end

#description=(value) ⇒ Object Also known as: definition=



265
266
267
# File 'lib/bio/db/biosql/sequence.rb', line 265

def description=(value)
  @entry.description=value
end

#divisionObject



252
253
254
# File 'lib/bio/db/biosql/sequence.rb', line 252

def division
  @entry.division
end

#division=(value) ⇒ Object



256
257
258
# File 'lib/bio/db/biosql/sequence.rb', line 256

def division=(value)
  @entry.division=value
end

#feature=(feat) ⇒ Object



293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/bio/db/biosql/sequence.rb', line 293

def feature=(feat)
  #ToDo: avoid Ontology find here, probably more efficient create class variables
  #DELETE        type_term_ontology = Ontology.find_or_create({:name=>'SeqFeature Keys'})
  puts "feature:type_term = #{feat.feature}" if $DEBUG
  type_term = Term.first(:conditions=>["name = ?", feat.feature]) || Term.create({:name=>feat.feature, :ontology=>Ontology.first(:conditions=>["name = ?",'SeqFeature Keys'])})
  #DELETE        source_term_ontology = Ontology.find_or_create({:name=>'SeqFeature Sources'})
  puts "feature:source_term" if $DEBUG
  source_term = Term.first(:conditions=>["name = ?",'EMBLGenBankSwit'])
  puts "feature:seqfeature" if $DEBUG
  seqfeature = @entry.seqfeatures.build({:source_term=>source_term, :type_term=>type_term, :rank=>@entry.seqfeatures.count.succ, :display_name=>''})
  seqfeature.save
  puts "feature:location" if $DEBUG
  feat.locations.each do |loc|
    location = seqfeature.locations.build({:seqfeature=>seqfeature, :start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand, :rank=>seqfeature.locations.count.succ})
    location.save
  end

  #DELETE        qual_term_ontology = Ontology.find_or_create({:name=>'Annotation Tags'})

  puts "feature:qualifier" if $DEBUG
  feat.each do |qualifier|
    #DELETE          qual_term = Term.find_or_create({:name=>qualifier.qualifier}, {:ontology=>qual_term_ontology})
    qual_term = Term.first(:conditions=>["name = ?", qualifier.qualifier]) || Term.create({:name=>qualifier.qualifier, :ontology=>Ontology.first(:conditions=>["name = ?", 'Annotation Tags'])})
    qual = seqfeature.seqfeature_qualifier_values.build({:seqfeature=>seqfeature, :term=>qual_term, :value=>qualifier.value.to_s, :rank=>seqfeature.seqfeature_qualifier_values.count.succ})
    qual.save

  end
end

#featuresObject



287
288
289
290
291
# File 'lib/bio/db/biosql/sequence.rb', line 287

def features
  @entry.seqfeatures.collect do |sf|
    self.get_seqfeature(sf)
  end
end

#get_seqfeature(sf) ⇒ Object



93
94
95
96
97
98
99
100
# File 'lib/bio/db/biosql/sequence.rb', line 93

def get_seqfeature(sf)

  #in seqfeature BioSQL class
  locations_str = sf.locations.map{|loc| loc.to_s}.join(',')
  #pp sf.locations.inspect
  locations_str = "join(#{locations_str})" if sf.locations.count>1
  Bio::Feature.new(sf.type_term.name, locations_str,sf.seqfeature_qualifier_values.collect{|sfqv| Bio::Feature::Qualifier.new(sfqv.term.name,sfqv.value)})
end

#identifierObject Also known as: other_seqids



270
271
272
# File 'lib/bio/db/biosql/sequence.rb', line 270

def identifier
  @entry.identifier
end

#identifier=(value) ⇒ Object



275
276
277
# File 'lib/bio/db/biosql/sequence.rb', line 275

def identifier=(value)
  @entry.identifier=value
end

#lengthObject



365
366
367
# File 'lib/bio/db/biosql/sequence.rb', line 365

def length
  @entry.biosequence.length
end

#length=(len) ⇒ Object



102
103
104
# File 'lib/bio/db/biosql/sequence.rb', line 102

def length=(len)
  @entry.biosequence.length=len
end

#nameObject Also known as: entry_id



195
196
197
# File 'lib/bio/db/biosql/sequence.rb', line 195

def name
  @entry.name
end

#name=(value) ⇒ Object Also known as: entry_id=



200
201
202
# File 'lib/bio/db/biosql/sequence.rb', line 200

def name=(value)
  @entry.name=value
end

#organismObject Also known as: species

TODO def secondary_accession

  @entry.bioentry_qualifier_values
end


217
218
219
# File 'lib/bio/db/biosql/sequence.rb', line 217

def organism
  @entry.taxon.nil? ? "" : "#{@entry.taxon.taxon_scientific_name.name}"+ (@entry.taxon.taxon_genbank_common_name ? "(#{@entry.taxon.taxon_genbank_common_name.name})" : '')
end

#organism=(value) ⇒ Object Also known as: species=



222
223
224
225
226
227
228
229
230
231
# File 'lib/bio/db/biosql/sequence.rb', line 222

def organism=(value)
  #FIX there is a shortcut
  taxon_name=TaxonName.first(:conditions=>["name = ? and name_class = ?",value.gsub(/\s+\(.+\)/,''),'scientific name'])
  if taxon_name.nil?
    puts "Error value doesn't exists in taxon_name table with scientific name constraint."
  else
    @entry.taxon_id=taxon_name.taxon_id
    @entry.save
  end
end

#primary_accessionObject



205
206
207
# File 'lib/bio/db/biosql/sequence.rb', line 205

def primary_accession
  @entry.accession
end

#primary_accession=(value) ⇒ Object



209
210
211
# File 'lib/bio/db/biosql/sequence.rb', line 209

def primary_accession=(value)
  @entry.accession=value
end

#reference=(value) ⇒ Object



399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
# File 'lib/bio/db/biosql/sequence.rb', line 399

def reference=(value)       
  locations=Array.new
  locations << "journal=#{value.journal}" unless value.journal.empty?
  locations << "volume=#{value.volume}" unless value.volume.empty?
  locations << "issue=#{value.issue}" unless value.issue.empty?
  locations << "pages=#{value.pages}" unless value.pages.empty?
  locations << "year=#{value.year}" unless value.year.empty?
  locations << "pubmed=#{value.pubmed}" unless value.pubmed.empty?
  locations << "medline=#{value.medline}" unless value.medline.empty?
  locations << "doi=#{value.doi}" unless value.doi.nil?
  locations << "abstract=#{value.abstract}" unless value.abstract.empty?
  locations << "url=#{value.url}" unless value.url.nil?
  locations << "mesh=#{value.mesh}" unless value.mesh.empty?
  locations << "affiliations=#{value.affiliations}" unless value.affiliations.empty?
  locations << "comments=#{value.comments.join('~')}"unless value.comments.nil?
  start_pos, end_pos = value.sequence_position ? value.sequence_position.gsub(/\s*/,'').split('-') : [nil,nil]
  reference= Reference.first(:conditions=>["title = ?",value.title]) || Reference.create({:title=>value.title,:authors=>value.authors.join(' '), :location=>locations.join('|')})
  bio_reference=@entry.bioentry_references.build({:reference=>reference,:rank=>value.embl_gb_record_number, :start_pos=>start_pos, :end_pos=>end_pos})
  bio_reference.save
end

#referencesObject



369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
# File 'lib/bio/db/biosql/sequence.rb', line 369

def references
  #return and array of hash, hash has these keys ["title", "dbxref_id", "reference_id", "authors", "crc", "location"]
  #probably would be better to d a class refrence to collect these informations
  @entry.bioentry_references.collect do |bio_ref|
    hash = Hash.new
    hash['authors'] = bio_ref.reference.authors.gsub(/\.\s/, "\.\s\|").split(/\|/)

    hash['sequence_position'] = "#{bio_ref.start_pos}-#{bio_ref.end_pos}" if (bio_ref.start_pos and bio_ref.end_pos)
    hash['title'] = bio_ref.reference.title
    hash['embl_gb_record_number'] = bio_ref.rank
    #TODO: solve the problem with specific comment per reference.
    #TODO: get dbxref
    #take a look when location is build up in def reference=(value)

    bio_ref.reference.location.split('|').each do |element|
      key,value=element.split('=')
      hash[key]=value
    end unless bio_ref.reference.location.nil?

    hash['xrefs'] = bio_ref.reference.dbxref ? "#{bio_ref.reference.dbxref.dbname}; #{bio_ref.reference.dbxref.accession}." : ''
    Bio::Reference.new(hash)
  end
end

#saveObject



426
427
428
429
430
# File 'lib/bio/db/biosql/sequence.rb', line 426

def save
  #I should add chks for SQL errors
  @entry.biosequence.save
  @entry.save
end

#seqObject

Returns the sequence. Returns a Bio::Sequence::Generic object.



330
331
332
333
# File 'lib/bio/db/biosql/sequence.rb', line 330

def seq
  s = @entry.biosequence
  Bio::Sequence::Generic.new(s ? s.seq : '')
end

#seq=(value) ⇒ Object



335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'lib/bio/db/biosql/sequence.rb', line 335

def seq=(value)
  #TODO: revise this piece of code.
  #chk which type of alphabet is, NU/NA/nil
  if @entry.biosequence.nil?
    #DELETE          puts "intoseq1"
    @entry.biosequence = Biosequence.new(:seq=>value)
    #          biosequence = @entry.biosequence.build({:seq=>value})
    @entry.biosequence.save
    #          biosequence.save
  else
    @entry.biosequence.seq=value
  end
  self.length=value.length
  #DELETE        #@entry.biosequence.length=value.length
  #DELETE        #break
  @entry.save
end

#taxonomyObject

report parents and exclude info with “no rank”. Now I report rank == class but … Question ? Have to be reported taxonomy with rank==“class”?



354
355
356
357
358
359
360
361
362
363
# File 'lib/bio/db/biosql/sequence.rb', line 354

def taxonomy
  tax = []
  taxon = Taxon.first(:conditions=>["taxon_id = ?",@entry.taxon.parent_taxon_id])
  while taxon and taxon.taxon_id != taxon.parent_taxon_id and taxon.node_rank!='no rank'
    tax << taxon.taxon_scientific_name.name if taxon.node_rank!='class'
    #Note: I don't like this call very much, correct with a relationship in the ref class.
    taxon = Taxon.first(:conditions=>["taxon_id = ?",taxon.parent_taxon_id])
  end
  tax.reverse
end

#to_biosequenceObject



439
440
441
# File 'lib/bio/db/biosql/sequence.rb', line 439

def to_biosequence
  Bio::Sequence.adapter(self,Bio::Sequence::Adapter::BioSQL)
end

#to_biosql(bs, biodatabase) ⇒ Object



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/bio/db/biosql/sequence.rb', line 114

def to_biosql(bs,biodatabase)
  #DELETE        #Transcaction works greatly!!!
  begin
    #DELETE          Bioentry.transaction do
    @entry = biodatabase.bioentries.build({:name=>bs.entry_id})

    puts "primary" if $DEBUG
    self.primary_accession = bs.primary_accession

    puts "def" if $DEBUG
    self.definition = bs.definition unless bs.definition.nil?

    puts "seqver" if $DEBUG
    self.sequence_version = bs.sequence_version || 0

    puts "divi" if $DEBUG
    self.division = bs.division unless bs.division.nil?

    puts "identifier" if $DEBUG
    self.identifier = bs.other_seqids.collect{|dblink| "#{dblink.database}:#{dblink.id}"}.join(';') unless bs.other_seqids.nil?
    @entry.save
    puts "secacc" if $DEBUG

    bs.secondary_accessions.each do |sa|
      puts "#{sa}" if $DEBUG
      #write as qualifier every secondary accession into the array
      self.secondary_accessions = sa
    end unless bs.secondary_accessions.nil?


    #to create the sequence entry needs to exists
    puts "seq" if $DEBUG
    puts bs.seq if $DEBUG
    self.seq = bs.seq unless bs.seq.nil?
    puts "mol" if $DEBUG

    self.molecule_type = bs.molecule_type unless bs.molecule_type.nil?
    puts "dc" if $DEBUG

    self.data_class = bs.data_class unless bs.data_class.nil?
    puts "top" if $DEBUG
    self.topology = bs.topology unless bs.topology.nil?
    puts "datec" if $DEBUG
    self.date_created = bs.date_created unless bs.date_created.nil?
    puts "datemod" if $DEBUG
    self.date_modified = bs.date_modified unless bs.date_modified.nil?
    puts "key" if $DEBUG

    bs.keywords.each do |kw|
      #write as qualifier every secondary accessions into the array
      self.keywords = kw
    end unless bs.keywords.nil?

    puts "spec" if $DEBUG
    #self.species = bs.species unless bs.species.nil?
    self.species = bs.species unless bs.species.empty?
    puts "Debug: #{bs.species}" if $DEBUG
    puts "Debug: feat..start" if $DEBUG

    bs.features.each do |feat|
      self.feature=feat
    end unless bs.features.nil?

    puts "Debug: feat...end" if $DEBUG
    bs.references.each do |reference|
      self.reference=reference
    end unless bs.references.nil?

    bs.comments.each do |comment|
      self.comment=comment
    end unless bs.comments.nil?

    #DELETE          end #transaction
    return self
  rescue Exception => e
    puts "to_biosql exception: #{e}"
    puts $!
  end #rescue
end

#to_fastaObject



431
432
433
# File 'lib/bio/db/biosql/sequence.rb', line 431

def to_fasta
  ">" + accession + "\n" + seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
end

#to_fasta_reverse_complememtObject



435
436
437
# File 'lib/bio/db/biosql/sequence.rb', line 435

def to_fasta_reverse_complememt
  ">" + accession + "\n" + seq.reverse_complement.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
end

#versionObject Also known as: sequence_version



242
243
244
# File 'lib/bio/db/biosql/sequence.rb', line 242

def version
  @entry.version
end

#version=(value) ⇒ Object Also known as: sequence_version=



247
248
249
# File 'lib/bio/db/biosql/sequence.rb', line 247

def version=(value)
  @entry.version=value
end