Class: Bio::PhyloXML::Sequence

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/db/phyloxml/phyloxml_elements.rb

Overview

Description

Element Sequence is used to represent a molecular sequence (Protein, DNA, RNA) associated with a node.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeSequence


562
563
564
565
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 562

def initialize
  @annotations = []
  @other = []
end

Instance Attribute Details

#accessionObject

Accession object. Holds source and identifier for the sequence.


539
540
541
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 539

def accession
  @accession
end

#annotationsObject

Array of Annotation objects. Annotations of molecular sequence.


554
555
556
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 554

def annotations
  @annotations
end

#domain_architectureObject

DomainArchitecture object. Describes domain architecture of a protein.


556
557
558
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 556

def domain_architecture
  @domain_architecture
end

#id_refObject

String. One intended use for 'id_ref' is to link a sequence to a taxonomy (via the taxonomy's 'id_source') in the case of multiple sequences and taxonomies per node.


534
535
536
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 534

def id_ref
  @id_ref
end

#id_sourceObject

String. Used to link with other elements.


530
531
532
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 530

def id_source
  @id_source
end

#is_alignedObject

Boolean. used to indicated that this molecular sequence is aligned with all other sequences in the same phylogeny for which 'is aligned' is true as well (which, in most cases, means that gaps were introduced, and that all sequences for which 'is aligned' is true must have the same length)


549
550
551
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 549

def is_aligned
  @is_aligned
end

#locationObject

String. Location of a sequence on a genome/chromosome


541
542
543
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 541

def location
  @location
end

#mol_seqObject

String. The actual sequence is stored here.


543
544
545
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 543

def mol_seq
  @mol_seq
end

#nameObject

Full name (e.g. muscle Actin )


527
528
529
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 527

def name
  @name
end

#otherObject

Array of Other objects. Used to save additional information from other than PhyloXML namspace.


560
561
562
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 560

def other
  @other
end

#symbolObject

short (maximal ten characters) symbol of the sequence (e.g. 'ACTM')


537
538
539
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 537

def symbol
  @symbol
end

#typeObject

Type of sequence (rna, dna, protein)


524
525
526
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 524

def type
  @type
end

#uriObject

Uri object


552
553
554
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 552

def uri
  @uri
end

Instance Method Details

#is_aligned?Boolean


577
578
579
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 577

def is_aligned?
  @is_aligned
end

#to_biosequenceObject

converts Bio::PhyloXML:Sequence to Bio::Sequence object.


Returns

Bio::Sequence


628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 628

def to_biosequence
  #type is not a required attribute in phyloxml (nor any other Sequence
  #element) it might not hold any value, so we will not check what type it is.
  seq = Bio::Sequence.auto(@mol_seq)

  seq.id_namespace = @accession.source
  seq.entry_id = @accession.value
  # seq.primary_accession = @accession.value could be this
  seq.definition = @name
  #seq.comments = @name //this one?
  if (defined? @uri) && @uri
    h = {'url' => @uri.uri,
      'title' => @uri.desc }
    ref = Bio::Reference.new(h)
    seq.references << ref
  end
  seq.molecule_type = 'RNA' if @type == 'rna'
  seq.molecule_type = 'DNA' if @type == 'dna'

  #@todo deal with the properties. There might be properties which look
  #like bio sequence attributes or features
  return seq
end

#to_xmlObject

Converts elements to xml representation. Called by PhyloXML::Writer class.


590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
# File 'lib/bio/db/phyloxml/phyloxml_elements.rb', line 590

def to_xml
  
  seq = LibXML::XML::Node.new('sequence')
  if (defined? @type) && @type
    if ["dna", "rna", "protein"].include?(@type)
      seq["type"] = @type
    else 
      raise "Type attribute of Sequence has to be one of dna, rna or a."
    end
  end
  
  PhyloXML::Writer.generate_xml(seq, self, [
      [:attr, 'id_source'],
      [:attr, 'id_ref'],
      [:pattern, 'symbol', (defined? @symbol) ? @symbol : nil, Regexp.new("^\\S{1,10}$")],
      [:complex, 'accession', (defined? @accession) ? @accession : nil],
      [:simple, 'name', (defined? @name) ? @name : nil],
      [:simple, 'location', (defined? @location) ? @location : nil]])

  if (defined? @mol_seq) && @mol_seq
    molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
    molseq["is_aligned"] = @is_aligned.to_s if (defined? @is_aligned) && @is_aligned != nil
    seq << molseq
  end

  PhyloXML::Writer.generate_xml(seq, self, [
      #[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
      [:complex, 'uri', (defined? @uri) ? @uri : nil],
      [:objarr, 'annotation', 'annotations'],
      [:complex, 'domain_architecture', (defined? @domain_architecture) ? @domain_architecture : nil]])
      #@todo test domain_architecture
  #any
  return seq
end