Module: Bio::Alignment::EnumerableExtension

Includes:
Output, PropertyMethods
Included in:
ArrayExtension, HashExtension
Defined in:
lib/bio/alignment.rb,
lib/bio/alignment.rb

Overview

module Output

Constant Summary

Constant Summary

Constants included from PropertyMethods

PropertyMethods::GAP_CHAR, PropertyMethods::GAP_REGEXP, PropertyMethods::MISSING_CHAR

Instance Attribute Summary

Attributes included from PropertyMethods

#gap_char, #gap_regexp, #missing_char

Instance Method Summary collapse

Methods included from Output

#__output_phylip_common, #output, #output_clustal, #output_fasta, #output_molphy, #output_msf, #output_phylip, #output_phylipnon, #to_clustal

Methods included from PropertyMethods

#get_all_property, #is_gap?, #set_all_property

Instance Method Details

#alignment_collectObject

Iterates over each sequence and results running blocks are collected and returns a new alignment as a Bio::Alignment::SequenceArray object.

Note that it would be redefined if you want to change return value's class.



445
446
447
448
449
450
451
452
# File 'lib/bio/alignment.rb', line 445

def alignment_collect
  a = SequenceArray.new
  a.set_all_property(get_all_property)
  each_seq do |str|
    a << yield(str)
  end
  a
end

#alignment_concat(align) ⇒ Object

Concatenates the given alignment. align must have each_seq or each method.

Returns self.

Note that it is a destructive method.

For Hash, please use it carefully because the order of the sequences is inconstant and key information is completely ignored.



849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
# File 'lib/bio/alignment.rb', line 849

def alignment_concat(align)
  flag = nil
  a = []
  each_seq { |s| a << s }
  i = 0
  begin
    align.each_seq do |seq|
      flag = true
      a[i].concat(seq) if a[i] and seq
      i += 1
    end
    return self
  rescue NoMethodError, ArgumentError => evar
    raise evar if flag
  end
  align.each do |seq|
    a[i].concat(seq) if a[i] and seq
    i += 1
  end
  self
end

#alignment_lengthObject Also known as: seq_length

Returns the alignment length. Returns the longest length of the sequence in the alignment.



366
367
368
369
370
371
372
373
# File 'lib/bio/alignment.rb', line 366

def alignment_length
  maxlen = 0
  each_seq do |s|
    x = s.length
    maxlen = x if x > maxlen
  end
  maxlen
end

#alignment_lstrip!Object Also known as: lstrip!

Removes excess gaps in the head of the sequences. If removes nothing, returns nil. Otherwise, returns self.

Note that it is a destructive method.



752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
# File 'lib/bio/alignment.rb', line 752

def alignment_lstrip!
  #(String-like)
  pos = 0
  each_site do |a|
    a.remove_gaps!
    if a.empty?
      pos += 1
    else
      break
    end
  end
  return nil if pos <= 0
  each_seq { |s| s[0, pos] = '' }
  self
end

#alignment_normalize!Object Also known as: normalize!

Fills gaps to the tail of each sequence if the length of the sequence is shorter than the alignment length.

Note that it is a destructive method.



712
713
714
715
716
717
718
719
# File 'lib/bio/alignment.rb', line 712

def alignment_normalize!
  #(original)
  len = alignment_length
  each_seq do |s|
    s << (gap_char * (len - s.length)) if s.length < len
  end
  self
end

#alignment_rstrip!Object Also known as: rstrip!

Removes excess gaps in the tail of the sequences. If removes nothing, returns nil. Otherwise, returns self.

Note that it is a destructive method.



727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
# File 'lib/bio/alignment.rb', line 727

def alignment_rstrip!
  #(String-like)
  len = alignment_length
  newlen = len
  each_site_step(len - 1, 0, -1) do |a|
    a.remove_gaps!
    if a.empty? then
      newlen -= 1
    else
      break
    end
  end
  return nil if newlen >= len
  each_seq do |s|
    s[newlen..-1] = '' if s.length > newlen
  end
  self
end

#alignment_site(position) ⇒ Object

Gets a site of the position. Returns a Bio::Alignment::Site object.

If the position is out of range, it returns the site of which all are gaps.



403
404
405
406
407
# File 'lib/bio/alignment.rb', line 403

def alignment_site(position)
  site = _alignment_site(position)
  site.set_all_property(get_all_property)
  site
end

#alignment_slice(*arg) ⇒ Object Also known as: slice

Returns the specified range of the alignment. For each sequence, the 'slice' method (it may be String#slice, which is the same as String#[]) is executed, and returns a new alignment as a Bio::Alignment::SequenceArray object.

Unlike alignment_window method, the result alignment might contain nil.

If you want to change return value's class, you should redefine alignment_collect method.



807
808
809
810
811
812
813
# File 'lib/bio/alignment.rb', line 807

def alignment_slice(*arg)
  #(String-like)
  #(BioPerl) AlignI::slice like method
  alignment_collect do |s|
    s.slice(*arg)
  end
end

#alignment_strip!Object Also known as: strip!

Removes excess gaps in the sequences. If removes nothing, returns nil. Otherwise, returns self.

Note that it is a destructive method.



774
775
776
777
778
779
# File 'lib/bio/alignment.rb', line 774

def alignment_strip!
  #(String-like)
  r = alignment_rstrip!
  l = alignment_lstrip!
  (r or l)
end

#alignment_subseq(*arg) ⇒ Object Also known as: subseq

For each sequence, the 'subseq' method (Bio::Seqeunce::Common#subseq is expected) is executed, and returns a new alignment as a Bio::Alignment::SequenceArray object.

All sequences in the alignment are expected to be kind of Bio::Sequence::NA or Bio::Sequence::AA objects.

Unlike alignment_window method, the result alignment might contain nil.

If you want to change return value's class, you should redefine alignment_collect method.



829
830
831
832
833
834
# File 'lib/bio/alignment.rb', line 829

def alignment_subseq(*arg)
  #(original)
  alignment_collect do |s|
    s.subseq(*arg)
  end
end

#alignment_window(*arg) ⇒ Object Also known as: window

Returns specified range of the alignment. For each sequence, the '[]' method (it may be String#[]) is executed, and returns a new alignment as a Bio::Alignment::SequenceArray object.

Unlike alignment_slice method, the result alignment are guaranteed to contain String object if the range specified is out of range.

If you want to change return value's class, you should redefine alignment_collect method.



466
467
468
469
470
# File 'lib/bio/alignment.rb', line 466

def alignment_window(*arg)
  alignment_collect do |s|
    s[*arg] or seqclass.new('')
  end
end

#collect_each_siteObject

Iterates over each site of the alignment and results running the block are collected and returns an array. It yields a Bio::Alignment::Site object.



503
504
505
506
507
508
509
# File 'lib/bio/alignment.rb', line 503

def collect_each_site
  ary = []
  each_site do |site|
    ary << yield(site)
  end
  ary
end

#consensus_each_site(opt = {}) ⇒ Object

Helper method for calculating consensus sequence. It iterates over each site of the alignment. In each site, gaps will be removed if specified with opt. It yields a Bio::Alignment::Site object. Results running the block (String objects are expected) are joined to a string and it returns the string.

opt[:gap_mode] ==> 0 -- gaps are regarded as normal characters
                   1 -- a site within gaps is regarded as a gap
                  -1 -- gaps are eliminated from consensus calculation
    default: 0


523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
# File 'lib/bio/alignment.rb', line 523

def consensus_each_site(opt = {})
  mchar = (opt[:missing_char] or self.missing_char)
  gap_mode = opt[:gap_mode]
  case gap_mode
  when 0, nil
    collect_each_site do |a|
      yield(a) or mchar
    end.join('')
  when 1
    collect_each_site do |a|
      a.has_gap? ? gap_char : (yield(a) or mchar)
    end.join('')
  when -1
    collect_each_site do |a|
      a.remove_gaps!
      a.empty? ? gap_char : (yield(a) or mchar)
    end.join('')
  else
    raise ':gap_mode must be 0, 1 or -1'
  end
end

#consensus_iupac(opt = {}) ⇒ Object

Returns the IUPAC consensus string of the alignment of nucleic-acid sequences.

It resembles the BioPerl's AlignI::consensus_iupac method.

Please refer to the consensus_each_site method for opt.



565
566
567
568
569
# File 'lib/bio/alignment.rb', line 565

def consensus_iupac(opt = {})
  consensus_each_site(opt) do |a|
    a.consensus_iupac
  end
end

#consensus_string(threshold = 1.0, opt = {}) ⇒ Object

Returns the consensus string of the alignment. 0.0 <= threshold <= 1.0 is expected.

It resembles the BioPerl's AlignI::consensus_string method.

Please refer to the consensus_each_site method for opt.



552
553
554
555
556
# File 'lib/bio/alignment.rb', line 552

def consensus_string(threshold = 1.0, opt = {})
  consensus_each_site(opt) do |a|
    a.consensus_string(threshold)
  end
end

#convert_match(match_char = '.') ⇒ Object

This is the BioPerl's AlignI::match like method.

Changes second to last sequences' sites to match_char(default: '.') when a site is equeal to the first sequence's corresponding site.

Note that it is a destructive method.

For Hash, please use it carefully because the order of the sequences is inconstant.



662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
# File 'lib/bio/alignment.rb', line 662

def convert_match(match_char = '.')
  #(BioPerl) AlignI::match like method
  len = alignment_length
  firstseq = nil
  each_seq do |s|
    unless firstseq then
      firstseq = s
    else
      (0...len).each do |i|
        if s[i] and firstseq[i] == s[i] and !is_gap?(firstseq[i..i])
          s[i..i] = match_char
        end
      end
    end
  end
  self
end

#convert_unmatch(match_char = '.') ⇒ Object

This is the BioPerl's AlignI::unmatch like method.

Changes second to last sequences' sites match_char(default: '.') to original sites' characters.

Note that it is a destructive method.

For Hash, please use it carefully because the order of the sequences is inconstant.



690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
# File 'lib/bio/alignment.rb', line 690

def convert_unmatch(match_char = '.')
  #(BioPerl) AlignI::unmatch like method
  len = alignment_length
  firstseq = nil
  each_seq do |s|
    unless firstseq then
      firstseq = s
    else
      (0...len).each do |i|
        if s[i..i] == match_char then
          s[i..i] = (firstseq[i..i] or match_char)
        end
      end
    end
  end
  self
end

#each_seq(&block) ⇒ Object

Iterates over each sequences. Yields a sequence. It acts the same as Enumerable#each.

You would redefine the method suitable for the class/object.



340
341
342
# File 'lib/bio/alignment.rb', line 340

def each_seq(&block) #:yields: seq
  each(&block)
end

#each_siteObject

Iterates over each site of the alignment. It yields a Bio::Alignment::Site object (which inherits Array). It returns self.



412
413
414
415
416
417
418
419
420
# File 'lib/bio/alignment.rb', line 412

def each_site
  cp = get_all_property
  (0...alignment_length).each do |i|
    site = _alignment_site(i)
    site.set_all_property(cp)
    yield(site)
  end
  self
end

#each_site_step(start, stop, step = 1) ⇒ Object

Iterates over each site of the alignment, with specifying start, stop positions and step. It yields Bio::Alignment::Site object (which inherits Array). It returns self. It is same as start.step(stop, step) { |i| yield alignment_site(i) }.



428
429
430
431
432
433
434
435
436
# File 'lib/bio/alignment.rb', line 428

def each_site_step(start, stop, step = 1)
  cp = get_all_property
  start.step(stop, step) do |i|
    site = _alignment_site(i)
    site.set_all_property(cp)
    yield(site)
  end
  self
end

#each_window(window_size, step_size = 1) ⇒ Object

Iterates over each sliding window of the alignment. window_size is the size of sliding window. step is the step of each sliding. It yields a Bio::Alignment::SequenceArray object which contains each sliding window. It returns a Bio::Alignment::SequenceArray object which contains remainder alignment at the terminal end. If window_size is smaller than 0, it returns nil.



481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
# File 'lib/bio/alignment.rb', line 481

def each_window(window_size, step_size = 1)
  return nil if window_size < 0
  if step_size >= 0 then
    last_step = nil
    0.step(alignment_length - window_size, step_size) do |i|
      yield alignment_window(i, window_size)
      last_step = i
    end
    alignment_window((last_step + window_size)..-1)
  else
    i = alignment_length - window_size
    while i >= 0
      yield alignment_window(i, window_size)
      i += step_size
    end
    alignment_window(0...(i-step_size))
  end
end

#match_line(opt = {}) ⇒ Object

Returns the match line stirng of the alignment of nucleic- or amino-acid sequences. The type of the sequence is automatically determined or you can specify with opt.

It resembles the BioPerl's AlignI::match_line method.

opt[:type] ==> :na or :aa (or determined by sequence class)
opt[:match_line_char]   ==> 100% equal    default: '*'
opt[:strong_match_char] ==> strong match  default: ':'
opt[:weak_match_char]   ==> weak match    default: '.'
opt[:mismatch_char]     ==> mismatch      default: ' '
  :strong_ and :weak_match_char are used only in amino mode (:aa)

More opt can be accepted. Please refer to the consensus_each_site method for opt.



624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
# File 'lib/bio/alignment.rb', line 624

def match_line(opt = {})
  case opt[:type]
  when :aa
    amino = true
  when :na, :dna, :rna
    amino = false
  else
    if seqclass == Bio::Sequence::AA then
      amino = true
    elsif seqclass == Bio::Sequence::NA then
      amino = false
    else
      amino = nil
      self.each_seq do |x|
        if /[EFILPQ]/i =~ x
          amino = true
          break
        end
      end
    end
  end
  if amino then
    match_line_amino(opt)
  else
    match_line_nuc(opt)
  end
end

#match_line_amino(opt = {}) ⇒ Object

Returns the match line stirng of the alignment of amino-acid sequences.

It resembles the BioPerl's AlignI::match_line method.

opt[:match_line_char]   ==> 100% equal    default: '*'
opt[:strong_match_char] ==> strong match  default: ':'
opt[:weak_match_char]   ==> weak match    default: '.'
opt[:mismatch_char]     ==> mismatch      default: ' '

More opt can be accepted. Please refer to the consensus_each_site method for opt.



584
585
586
587
588
# File 'lib/bio/alignment.rb', line 584

def match_line_amino(opt = {})
  collect_each_site do |a|
    a.match_line_amino(opt)
  end.join('')
end

#match_line_nuc(opt = {}) ⇒ Object

Returns the match line stirng of the alignment of nucleic-acid sequences.

It resembles the BioPerl's AlignI::match_line method.

opt[:match_line_char]   ==> 100% equal    default: '*'
opt[:mismatch_char]     ==> mismatch      default: ' '

More opt can be accepted. Please refer to the consensus_each_site method for opt.



601
602
603
604
605
# File 'lib/bio/alignment.rb', line 601

def match_line_nuc(opt = {})
  collect_each_site do |a|
    a.match_line_nuc(opt)
  end.join('')
end

#number_of_sequencesObject

Returns number of sequences in this alignment.



1315
1316
1317
1318
1319
# File 'lib/bio/alignment.rb', line 1315

def number_of_sequences
  i = 0
  self.each_seq { |s| i += 1 }
  i
end

#remove_all_gaps!Object

Completely removes ALL gaps in the sequences. If removes nothing, returns nil. Otherwise, returns self.

Note that it is a destructive method.



787
788
789
790
791
792
793
794
# File 'lib/bio/alignment.rb', line 787

def remove_all_gaps!
  ret = nil
  each_seq do |s|
    x = s.gsub!(gap_regexp, '')
    ret ||= x
  end
  ret ? self : nil
end

#seqclassObject

Returns class of the sequence. If instance variable @seqclass (which can be set by 'seqclass=' method) is set, simply returns the value. Otherwise, returns the first sequence's class. If no sequences are found, returns nil.



349
350
351
352
353
354
355
356
357
358
359
360
361
362
# File 'lib/bio/alignment.rb', line 349

def seqclass
  if (defined? @seqclass) and @seqclass then
    @seqclass
  else
    klass = nil
    each_seq do |s|
      if s then
        klass = s.class
        break if klass
      end
    end
    (klass or String)
  end
end

#sequence_namesObject

Returns an array of sequence names. The order of the names must be the same as the order of each_seq.



1324
1325
1326
# File 'lib/bio/alignment.rb', line 1324

def sequence_names
  (0...(self.number_of_sequences)).to_a
end