Class: Bio::PhyloXML::Sequence

Inherits:
Object
  • Object
show all
Defined in:
lib/bio-phyloxml/phyloxml_elements.rb

Overview

Description

Element Sequence is used to represent a molecular sequence (Protein, DNA, RNA) associated with a node.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeSequence

Returns a new instance of Sequence.



551
552
553
554
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 551

def initialize
  @annotations = []
  @other = []
end

Instance Attribute Details

#accessionObject

Accession object. Holds source and identifier for the sequence.



528
529
530
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 528

def accession
  @accession
end

#annotationsObject

Array of Annotation objects. Annotations of molecular sequence.



543
544
545
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 543

def annotations
  @annotations
end

#domain_architectureObject

DomainArchitecture object. Describes domain architecture of a protein.



545
546
547
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 545

def domain_architecture
  @domain_architecture
end

#id_refObject

String. One intended use for ‘id_ref’ is to link a sequence to a taxonomy (via the taxonomy’s ‘id_source’) in the case of multiple sequences and taxonomies per node.



523
524
525
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 523

def id_ref
  @id_ref
end

#id_sourceObject

String. Used to link with other elements.



519
520
521
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 519

def id_source
  @id_source
end

#is_alignedObject

Boolean. used to indicated that this molecular sequence is aligned with all other sequences in the same phylogeny for which ‘is aligned’ is true as well (which, in most cases, means that gaps were introduced, and that all sequences for which ‘is aligned’ is true must have the same length)



538
539
540
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 538

def is_aligned
  @is_aligned
end

#locationObject

String. Location of a sequence on a genome/chromosome



530
531
532
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 530

def location
  @location
end

#mol_seqObject

String. The actual sequence is stored here.



532
533
534
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 532

def mol_seq
  @mol_seq
end

#nameObject

Full name (e.g. muscle Actin )



516
517
518
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 516

def name
  @name
end

#otherObject

Array of Other objects. Used to save additional information from other than PhyloXML namspace.



549
550
551
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 549

def other
  @other
end

#symbolObject

short (maximal ten characters) symbol of the sequence (e.g. ‘ACTM’)



526
527
528
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 526

def symbol
  @symbol
end

#typeObject

Type of sequence (rna, dna, protein)



513
514
515
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 513

def type
  @type
end

#uriObject

Uri object



541
542
543
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 541

def uri
  @uri
end

Instance Method Details

#is_aligned?Boolean

Returns:

  • (Boolean)


566
567
568
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 566

def is_aligned?
  @is_aligned
end

#to_biosequenceObject

converts Bio::PhyloXML:Sequence to Bio::Sequence object.


Returns

Bio::Sequence



617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 617

def to_biosequence
  #type is not a required attribute in phyloxml (nor any other Sequence
  #element) it might not hold any value, so we will not check what type it is.
  seq = Bio::Sequence.auto(@mol_seq)

  seq.id_namespace = @accession.source
  seq.entry_id = @accession.value
  # seq.primary_accession = @accession.value could be this
  seq.definition = @name
  #seq.comments = @name //this one?
  if (defined? @uri) && @uri
    h = {'url' => @uri.uri,
      'title' => @uri.desc }
    ref = Bio::Reference.new(h)
    seq.references << ref
  end
  seq.molecule_type = 'RNA' if @type == 'rna'
  seq.molecule_type = 'DNA' if @type == 'dna'

  #@todo deal with the properties. There might be properties which look
  #like bio sequence attributes or features
  return seq
end

#to_xmlObject

Converts elements to xml representation. Called by PhyloXML::Writer class.



579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
# File 'lib/bio-phyloxml/phyloxml_elements.rb', line 579

def to_xml
  
  seq = LibXML::XML::Node.new('sequence')
  if (defined? @type) && @type
    if ["dna", "rna", "protein"].include?(@type)
      seq["type"] = @type
    else 
      raise "Type attribute of Sequence has to be one of dna, rna or a."
    end
  end
  
  PhyloXML::Writer.generate_xml(seq, self, [
      [:attr, 'id_source'],
      [:attr, 'id_ref'],
      [:pattern, 'symbol', (defined? @symbol) ? @symbol : nil, Regexp.new("^\\S{1,10}$")],
      [:complex, 'accession', (defined? @accession) ? @accession : nil],
      [:simple, 'name', (defined? @name) ? @name : nil],
      [:simple, 'location', (defined? @location) ? @location : nil]])

  if (defined? @mol_seq) && @mol_seq
    molseq = LibXML::XML::Node.new('mol_seq', @mol_seq)
    molseq["is_aligned"] = @is_aligned.to_s if (defined? @is_aligned) && @is_aligned != nil
    seq << molseq
  end

  PhyloXML::Writer.generate_xml(seq, self, [
      #[:pattern, 'mol_seq', @mol_seq, Regexp.new("^[a-zA-Z\.\-\?\*_]+$")],
      [:complex, 'uri', (defined? @uri) ? @uri : nil],
      [:objarr, 'annotation', 'annotations'],
      [:complex, 'domain_architecture', (defined? @domain_architecture) ? @domain_architecture : nil]])
      #@todo test domain_architecture
  #any
  return seq
end