Module: Bio::Alignment::EnumerableExtension

Includes:
Output, PropertyMethods
Included in:
ArrayExtension, HashExtension
Defined in:
lib/bio/alignment.rb,
lib/bio/alignment.rb

Overview

module Output

Constant Summary

Constants included from PropertyMethods

PropertyMethods::GAP_CHAR, PropertyMethods::GAP_REGEXP, PropertyMethods::MISSING_CHAR

Instance Attribute Summary

Attributes included from PropertyMethods

#gap_char, #gap_regexp, #missing_char

Instance Method Summary collapse

Methods included from Output

#__output_phylip_common, #output, #output_clustal, #output_fasta, #output_molphy, #output_msf, #output_phylip, #output_phylipnon, #to_clustal

Methods included from PropertyMethods

#get_all_property, #is_gap?, #set_all_property

Instance Method Details

#alignment_collectObject

Iterates over each sequence and results running blocks are collected and returns a new alignment as a Bio::Alignment::SequenceArray object.

Note that it would be redefined if you want to change return value’s class.



445
446
447
448
449
450
451
452
# File 'lib/bio/alignment.rb', line 445

def alignment_collect
  a = SequenceArray.new
  a.set_all_property(get_all_property)
  each_seq do |str|
    a << yield(str)
  end
  a
end

#alignment_concat(align) ⇒ Object

Concatenates the given alignment. align must have each_seq or each method.

Returns self.

Note that it is a destructive method.

For Hash, please use it carefully because the order of the sequences is inconstant and key information is completely ignored.



848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
# File 'lib/bio/alignment.rb', line 848

def alignment_concat(align)
  flag = nil
  a = []
  each_seq { |s| a << s }
  i = 0
  begin
    align.each_seq do |seq|
      flag = true
      a[i].concat(seq) if a[i] and seq
      i += 1
    end
    return self
  rescue NoMethodError, ArgumentError => evar
    raise evar if flag
  end
  align.each do |seq|
    a[i].concat(seq) if a[i] and seq
    i += 1
  end
  self
end

#alignment_lengthObject Also known as: seq_length

Returns the alignment length. Returns the longest length of the sequence in the alignment.



366
367
368
369
370
371
372
373
# File 'lib/bio/alignment.rb', line 366

def alignment_length
  maxlen = 0
  each_seq do |s|
    x = s.length
    maxlen = x if x > maxlen
  end
  maxlen
end

#alignment_lstrip!Object Also known as: lstrip!

Removes excess gaps in the head of the sequences. If removes nothing, returns nil. Otherwise, returns self.

Note that it is a destructive method.



751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
# File 'lib/bio/alignment.rb', line 751

def alignment_lstrip!
  #(String-like)
  pos = 0
  each_site do |a|
    a.remove_gaps!
    if a.empty?
      pos += 1
    else
      break
    end
  end
  return nil if pos <= 0
  each_seq { |s| s[0, pos] = '' }
  self
end

#alignment_normalize!Object Also known as: normalize!

Fills gaps to the tail of each sequence if the length of the sequence is shorter than the alignment length.

Note that it is a destructive method.



711
712
713
714
715
716
717
718
# File 'lib/bio/alignment.rb', line 711

def alignment_normalize!
  #(original)
  len = alignment_length
  each_seq do |s|
    s << (gap_char * (len - s.length)) if s.length < len
  end
  self
end

#alignment_rstrip!Object Also known as: rstrip!

Removes excess gaps in the tail of the sequences. If removes nothing, returns nil. Otherwise, returns self.

Note that it is a destructive method.



726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
# File 'lib/bio/alignment.rb', line 726

def alignment_rstrip!
  #(String-like)
  len = alignment_length
  newlen = len
  each_site_step(len - 1, 0, -1) do |a|
    a.remove_gaps!
    if a.empty? then
      newlen -= 1
    else
      break
    end
  end
  return nil if newlen >= len
  each_seq do |s|
    s[newlen..-1] = '' if s.length > newlen
  end
  self
end

#alignment_site(position) ⇒ Object

Gets a site of the position. Returns a Bio::Alignment::Site object.

If the position is out of range, it returns the site of which all are gaps.



403
404
405
406
407
# File 'lib/bio/alignment.rb', line 403

def alignment_site(position)
  site = _alignment_site(position)
  site.set_all_property(get_all_property)
  site
end

#alignment_slice(*arg) ⇒ Object Also known as: slice

Returns the specified range of the alignment. For each sequence, the ‘slice’ method (it may be String#slice, which is the same as String#[]) is executed, and returns a new alignment as a Bio::Alignment::SequenceArray object.

Unlike alignment_window method, the result alignment might contain nil.

If you want to change return value’s class, you should redefine alignment_collect method.



806
807
808
809
810
811
812
# File 'lib/bio/alignment.rb', line 806

def alignment_slice(*arg)
  #(String-like)
  #(BioPerl) AlignI::slice like method
  alignment_collect do |s|
    s.slice(*arg)
  end
end

#alignment_strip!Object Also known as: strip!

Removes excess gaps in the sequences. If removes nothing, returns nil. Otherwise, returns self.

Note that it is a destructive method.



773
774
775
776
777
778
# File 'lib/bio/alignment.rb', line 773

def alignment_strip!
  #(String-like)
  r = alignment_rstrip!
  l = alignment_lstrip!
  (r or l)
end

#alignment_subseq(*arg) ⇒ Object Also known as: subseq

For each sequence, the ‘subseq’ method (Bio::Seqeunce::Common#subseq is expected) is executed, and returns a new alignment as a Bio::Alignment::SequenceArray object.

All sequences in the alignment are expected to be kind of Bio::Sequence::NA or Bio::Sequence::AA objects.

Unlike alignment_window method, the result alignment might contain nil.

If you want to change return value’s class, you should redefine alignment_collect method.



828
829
830
831
832
833
# File 'lib/bio/alignment.rb', line 828

def alignment_subseq(*arg)
  #(original)
  alignment_collect do |s|
    s.subseq(*arg)
  end
end

#alignment_window(*arg) ⇒ Object Also known as: window

Returns specified range of the alignment. For each sequence, the ‘[]’ method (it may be String#[]) is executed, and returns a new alignment as a Bio::Alignment::SequenceArray object.

Unlike alignment_slice method, the result alignment are guaranteed to contain String object if the range specified is out of range.

If you want to change return value’s class, you should redefine alignment_collect method.



466
467
468
469
470
# File 'lib/bio/alignment.rb', line 466

def alignment_window(*arg)
  alignment_collect do |s|
    s[*arg] or seqclass.new('')
  end
end

#collect_each_siteObject

Iterates over each site of the alignment and results running the block are collected and returns an array. It yields a Bio::Alignment::Site object.



502
503
504
505
506
507
508
# File 'lib/bio/alignment.rb', line 502

def collect_each_site
  ary = []
  each_site do |site|
    ary << yield(site)
  end
  ary
end

#consensus_each_site(opt = {}) ⇒ Object

Helper method for calculating consensus sequence. It iterates over each site of the alignment. In each site, gaps will be removed if specified with opt. It yields a Bio::Alignment::Site object. Results running the block (String objects are expected) are joined to a string and it returns the string.

opt[:gap_mode] ==> 0 -- gaps are regarded as normal characters
                   1 -- a site within gaps is regarded as a gap
                  -1 -- gaps are eliminated from consensus calculation
    default: 0


522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
# File 'lib/bio/alignment.rb', line 522

def consensus_each_site(opt = {})
  mchar = (opt[:missing_char] or self.missing_char)
  gap_mode = opt[:gap_mode]
  case gap_mode
  when 0, nil
    collect_each_site do |a|
      yield(a) or mchar
    end.join('')
  when 1
    collect_each_site do |a|
      a.has_gap? ? gap_char : (yield(a) or mchar)
    end.join('')
  when -1
    collect_each_site do |a|
      a.remove_gaps!
      a.empty? ? gap_char : (yield(a) or mchar)
    end.join('')
  else
    raise ':gap_mode must be 0, 1 or -1'
  end
end

#consensus_iupac(opt = {}) ⇒ Object

Returns the IUPAC consensus string of the alignment of nucleic-acid sequences.

It resembles the BioPerl’s AlignI::consensus_iupac method.

Please refer to the consensus_each_site method for opt.



564
565
566
567
568
# File 'lib/bio/alignment.rb', line 564

def consensus_iupac(opt = {})
  consensus_each_site(opt) do |a|
    a.consensus_iupac
  end
end

#consensus_string(threshold = 1.0, opt = {}) ⇒ Object

Returns the consensus string of the alignment. 0.0 <= threshold <= 1.0 is expected.

It resembles the BioPerl’s AlignI::consensus_string method.

Please refer to the consensus_each_site method for opt.



551
552
553
554
555
# File 'lib/bio/alignment.rb', line 551

def consensus_string(threshold = 1.0, opt = {})
  consensus_each_site(opt) do |a|
    a.consensus_string(threshold)
  end
end

#convert_match(match_char = '.') ⇒ Object

This is the BioPerl’s AlignI::match like method.

Changes second to last sequences’ sites to match_char(default: ‘.’) when a site is equeal to the first sequence’s corresponding site.

Note that it is a destructive method.

For Hash, please use it carefully because the order of the sequences is inconstant.



661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
# File 'lib/bio/alignment.rb', line 661

def convert_match(match_char = '.')
  #(BioPerl) AlignI::match like method
  len = alignment_length
  firstseq = nil
  each_seq do |s|
    unless firstseq then
      firstseq = s
    else
      (0...len).each do |i|
        if s[i] and firstseq[i] == s[i] and !is_gap?(firstseq[i..i])
          s[i..i] = match_char
        end
      end
    end
  end
  self
end

#convert_unmatch(match_char = '.') ⇒ Object

This is the BioPerl’s AlignI::unmatch like method.

Changes second to last sequences’ sites match_char(default: ‘.’) to original sites’ characters.

Note that it is a destructive method.

For Hash, please use it carefully because the order of the sequences is inconstant.



689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
# File 'lib/bio/alignment.rb', line 689

def convert_unmatch(match_char = '.')
  #(BioPerl) AlignI::unmatch like method
  len = alignment_length
  firstseq = nil
  each_seq do |s|
    unless firstseq then
      firstseq = s
    else
      (0...len).each do |i|
        if s[i..i] == match_char then
          s[i..i] = (firstseq[i..i] or match_char)
        end
      end
    end
  end
  self
end

#each_seq(&block) ⇒ Object

Iterates over each sequences. Yields a sequence. It acts the same as Enumerable#each.

You would redefine the method suitable for the class/object.



340
341
342
# File 'lib/bio/alignment.rb', line 340

def each_seq(&block) #:yields: seq
  each(&block)
end

#each_siteObject

Iterates over each site of the alignment. It yields a Bio::Alignment::Site object (which inherits Array). It returns self.



412
413
414
415
416
417
418
419
420
# File 'lib/bio/alignment.rb', line 412

def each_site
  cp = get_all_property
  (0...alignment_length).each do |i|
    site = _alignment_site(i)
    site.set_all_property(cp)
    yield(site)
  end
  self
end

#each_site_step(start, stop, step = 1) ⇒ Object

Iterates over each site of the alignment, with specifying start, stop positions and step. It yields Bio::Alignment::Site object (which inherits Array). It returns self. It is same as start.step(stop, step) { |i| yield alignment_site(i) }.



428
429
430
431
432
433
434
435
436
# File 'lib/bio/alignment.rb', line 428

def each_site_step(start, stop, step = 1)
  cp = get_all_property
  start.step(stop, step) do |i|
    site = _alignment_site(i)
    site.set_all_property(cp)
    yield(site)
  end
  self
end

#each_window(window_size, step_size = 1) ⇒ Object

Iterates over each sliding window of the alignment. window_size is the size of sliding window. step is the step of each sliding. It yields a Bio::Alignment::SequenceArray object which contains each sliding window. It returns a Bio::Alignment::SequenceArray object which contains remainder alignment at the terminal end. If window_size is smaller than 0, it returns nil.



481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
# File 'lib/bio/alignment.rb', line 481

def each_window(window_size, step_size = 1)
  return nil if window_size < 0
  if step_size >= 0 then
    i = nil
    0.step(alignment_length - window_size, step_size) do |i|
      yield alignment_window(i, window_size)
    end
    alignment_window((i+window_size)..-1)
  else
    i = alignment_length - window_size
    while i >= 0
      yield alignment_window(i, window_size)
      i += step_size
    end
    alignment_window(0...(i-step_size))
  end
end

#match_line(opt = {}) ⇒ Object

Returns the match line stirng of the alignment of nucleic- or amino-acid sequences. The type of the sequence is automatically determined or you can specify with opt.

It resembles the BioPerl’s AlignI::match_line method.

opt[:type] ==> :na or :aa (or determined by sequence class)
opt[:match_line_char]   ==> 100% equal    default: '*'
opt[:strong_match_char] ==> strong match  default: ':'
opt[:weak_match_char]   ==> weak match    default: '.'
opt[:mismatch_char]     ==> mismatch      default: ' '
  :strong_ and :weak_match_char are used only in amino mode (:aa)

More opt can be accepted. Please refer to the consensus_each_site method for opt.



623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
# File 'lib/bio/alignment.rb', line 623

def match_line(opt = {})
  case opt[:type]
  when :aa
    amino = true
  when :na, :dna, :rna
    amino = false
  else
    if seqclass == Bio::Sequence::AA then
      amino = true
    elsif seqclass == Bio::Sequence::NA then
      amino = false
    else
      amino = nil
      self.each_seq do |x|
        if /[EFILPQ]/i =~ x
          amino = true
          break
        end
      end
    end
  end
  if amino then
    match_line_amino(opt)
  else
    match_line_nuc(opt)
  end
end

#match_line_amino(opt = {}) ⇒ Object

Returns the match line stirng of the alignment of amino-acid sequences.

It resembles the BioPerl’s AlignI::match_line method.

opt[:match_line_char]   ==> 100% equal    default: '*'
opt[:strong_match_char] ==> strong match  default: ':'
opt[:weak_match_char]   ==> weak match    default: '.'
opt[:mismatch_char]     ==> mismatch      default: ' '

More opt can be accepted. Please refer to the consensus_each_site method for opt.



583
584
585
586
587
# File 'lib/bio/alignment.rb', line 583

def match_line_amino(opt = {})
  collect_each_site do |a|
    a.match_line_amino(opt)
  end.join('')
end

#match_line_nuc(opt = {}) ⇒ Object

Returns the match line stirng of the alignment of nucleic-acid sequences.

It resembles the BioPerl’s AlignI::match_line method.

opt[:match_line_char]   ==> 100% equal    default: '*'
opt[:mismatch_char]     ==> mismatch      default: ' '

More opt can be accepted. Please refer to the consensus_each_site method for opt.



600
601
602
603
604
# File 'lib/bio/alignment.rb', line 600

def match_line_nuc(opt = {})
  collect_each_site do |a|
    a.match_line_nuc(opt)
  end.join('')
end

#number_of_sequencesObject

Returns number of sequences in this alignment.



1314
1315
1316
1317
1318
# File 'lib/bio/alignment.rb', line 1314

def number_of_sequences
  i = 0
  self.each_seq { |s| i += 1 }
  i
end

#remove_all_gaps!Object

Completely removes ALL gaps in the sequences. If removes nothing, returns nil. Otherwise, returns self.

Note that it is a destructive method.



786
787
788
789
790
791
792
793
# File 'lib/bio/alignment.rb', line 786

def remove_all_gaps!
  ret = nil
  each_seq do |s|
    x = s.gsub!(gap_regexp, '')
    ret ||= x
  end
  ret ? self : nil
end

#seqclassObject

Returns class of the sequence. If instance variable @seqclass (which can be set by ‘seqclass=’ method) is set, simply returns the value. Otherwise, returns the first sequence’s class. If no sequences are found, returns nil.



349
350
351
352
353
354
355
356
357
358
359
360
361
362
# File 'lib/bio/alignment.rb', line 349

def seqclass
  if @seqclass then
    @seqclass
  else
    klass = nil
    each_seq do |s|
      if s then
        klass = s.class
        break if klass
      end
    end
    (klass or String)
  end
end

#sequence_namesObject

Returns an array of sequence names. The order of the names must be the same as the order of each_seq.



1323
1324
1325
# File 'lib/bio/alignment.rb', line 1323

def sequence_names
  (0...(self.number_of_sequences)).to_a
end