Class: Bio::Cigar
- Inherits:
-
Object
- Object
- Bio::Cigar
- Defined in:
- lib/bio-cigar/cigar.rb
Instance Attribute Summary collapse
-
#cigar_string ⇒ Object
Returns the value of attribute cigar_string.
Instance Method Summary collapse
-
#each_alignment_chunk ⇒ Object
Yield the type and count for each different part of the cigar string e.g.
-
#initialize(cigar_string) ⇒ Cigar
constructor
A new instance of Cigar.
- #percent_identity(reference_sequence_string, query_sequence_string) ⇒ Object
Constructor Details
#initialize(cigar_string) ⇒ Cigar
Returns a new instance of Cigar.
5 6 7 |
# File 'lib/bio-cigar/cigar.rb', line 5 def initialize(cigar_string) @cigar_string = cigar_string end |
Instance Attribute Details
#cigar_string ⇒ Object
Returns the value of attribute cigar_string.
3 4 5 |
# File 'lib/bio-cigar/cigar.rb', line 3 def cigar_string @cigar_string end |
Instance Method Details
#each_alignment_chunk ⇒ Object
69 70 71 72 73 74 75 76 77 78 |
# File 'lib/bio-cigar/cigar.rb', line 69 def each_alignment_chunk leftover = @cigar_string while matches = leftover.match(/^(\d+)([MSIHNDP\=X])(.*)/) yield matches[2], matches[1].to_i leftover = matches[3] end unless leftover.length == 0 raise "Incorrect parsing of cigar string #{@cigar_string}, at the end left with #{leftover}" end end |
#percent_identity(reference_sequence_string, query_sequence_string) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/bio-cigar/cigar.rb', line 9 def percent_identity(reference_sequence_string, query_sequence_string) num_match = 0 num_mismatch = 0 ref_index = 0 query_index = 0 each_alignment_chunk do |type, count| # puts "ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}" # puts "#{type} #{count}" # puts "ref=#{reference_sequence_string[ref_index...(reference_sequence_string.length)] }" # puts "query=#{query_sequence_string[query_index...(query_sequence_string.length)] }" case type when 'I' # Extra characters in the query sequence num_mismatch += count query_index += count when 'D' num_mismatch += count ref_index += count when 'S' #ref_index += count query_index += count when 'H' query_index += count when 'P' # Do nothing when 'N' # long skip on the reference sequence ref_index += count else if %w(M = X).include?(type) # For = and X, ignore these and recalculate, for ease of programming this method. (0...count).each do |i| if reference_sequence_string[ref_index+i] == query_sequence_string[query_index+i] num_match += 1 else num_mismatch += 1 end end ref_index += count query_index += count else raise "Cigar string not parsed correctly. Unrecognised alignment type #{type}" end end #puts "after, ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}" end percent = num_match.to_f / (num_match+num_mismatch)*100 return percent, num_match, num_mismatch end |