Class: Bio::PhyloXML::Sequence

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/db/phyloxml/phyloxml_elements.rb

Overview

Description

Element Sequence is used to represent a molecular sequence (Protein, DNA, RNA) associated with a node.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeSequence

Returns a new instance of Sequence


553
554
555
556
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 553

def initialize
  @annotations = []
  @other = []
end

Instance Attribute Details

#accessionObject

Accession object. Holds source and identifier for the sequence.


530
531
532
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 530

def accession
  @accession
end

#annotationsObject

Array of Annotation objects. Annotations of molecular sequence.


545
546
547
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 545

def annotations
  @annotations
end

#domain_architectureObject

DomainArchitecture object. Describes domain architecture of a protein.


547
548
549
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 547

def domain_architecture
  @domain_architecture
end

#id_refObject

String. One intended use for 'id_ref' is to link a sequence to a taxonomy (via the taxonomy's 'id_source') in the case of multiple sequences and taxonomies per node.


525
526
527
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 525

def id_ref
  @id_ref
end

#id_sourceObject

String. Used to link with other elements.


521
522
523
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 521

def id_source
  @id_source
end

#is_alignedObject

Boolean. used to indicated that this molecular sequence is aligned with all other sequences in the same phylogeny for which 'is aligned' is true as well (which, in most cases, means that gaps were introduced, and that all sequences for which 'is aligned' is true must have the same length)


540
541
542
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 540

def is_aligned
  @is_aligned
end

#locationObject

String. Location of a sequence on a genome/chromosome


532
533
534
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 532

def location
  @location
end

#mol_seqObject

String. The actual sequence is stored here.


534
535
536
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 534

def mol_seq
  @mol_seq
end

#nameObject

Full name (e.g. muscle Actin )


518
519
520
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 518

def name
  @name
end

#otherObject

Array of Other objects. Used to save additional information from other than PhyloXML namspace.


551
552
553
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 551

def other
  @other
end

#symbolObject

short (maximal ten characters) symbol of the sequence (e.g. 'ACTM')


528
529
530
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 528

def symbol
  @symbol
end

#typeObject

Type of sequence (rna, dna, protein)


515
516
517
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 515

def type
  @type
end

#uriObject

Uri object


543
544
545
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 543

def uri
  @uri
end

Instance Method Details

#is_aligned?Boolean

Returns:

  • (Boolean)

568
569
570
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 568

def is_aligned?
  @is_aligned
end

#to_biosequenceObject

converts Bio::PhyloXML:Sequence to Bio::Sequence object.


Returns

Bio::Sequence


619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 619

def to_biosequence
  #type is not a required attribute in phyloxml (nor any other Sequence
  #element) it might not hold any value, so we will not check what type it is.
  seq = Bio::Sequence.auto(@mol_seq)

  seq.id_namespace = @accession.source
  seq.entry_id = @accession.value
  # seq.primary_accession = @accession.value could be this
  seq.definition = @name
  #seq.comments = @name //this one?
  if @uri != nil
    h = {'url' => @uri.uri,
      'title' => @uri.desc }
    ref = Bio::Reference.new(h)
    seq.references << ref
  end
  seq.molecule_type = 'RNA' if @type == 'rna'
  seq.molecule_type = 'DNA' if @type == 'dna'

  #@todo deal with the properties. There might be properties which look
  #like bio sequence attributes or features
  return seq
end

#to_xmlObject

Converts elements to xml representation. Called by PhyloXML::Writer class.


581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 581

def to_xml
  
  seq = LibXML::XML::Node.new('sequence')
  if @type != nil
    if ["dna", "rna", "protein"].include?(@type)
      seq["type"] = @type
    else 
      raise "Type attribute of Sequence has to be one of dna, rna or a."
    end
  end
  
  PhyloXML::Writer.generate_xml(seq, self, [
      [:attr, 'id_source'],
      [:attr, 'id_ref'],
      [:pattern, 'symbol', @symbol, Regexp.new("^\\S{1,10}$")],
      [:complex, 'accession', @accession],
      [:simple, 'name', @name],
      [:simple, 'location', @location]])

  if @mol_seq != nil
    molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
    molseq["is_aligned"] = @is_aligned.to_s if @is_aligned != nil
    seq << molseq
  end

  PhyloXML::Writer.generate_xml(seq, self, [
      #[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
      [:complex, 'uri', @uri],
      [:objarr, 'annotation', 'annotations'],
      [:complex, 'domain_architecture', @domain_architecture]])
      #@todo test domain_architecture
  #any
  return seq
end