Class: Bio::Cigar

Inherits:
Object
  • Object
show all
Defined in:
lib/bio-cigar/cigar.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(cigar_string) ⇒ Cigar

Returns a new instance of Cigar.



5
6
7
# File 'lib/bio-cigar/cigar.rb', line 5

def initialize(cigar_string)
  @cigar_string = cigar_string
end

Instance Attribute Details

#cigar_stringObject

Returns the value of attribute cigar_string.



3
4
5
# File 'lib/bio-cigar/cigar.rb', line 3

def cigar_string
  @cigar_string
end

Instance Method Details

#each_alignment_chunkObject

Yield the type and count for each different part of the cigar string e.g.

cigar = Bio::Cigar.new('1S3M')
cigar.each_alignment_chunk do |type, count|
    type #=> first 'S', second 'M' (as strings)
    type #=> first 1, second 3 (as integers)
end


69
70
71
72
73
74
75
76
77
78
# File 'lib/bio-cigar/cigar.rb', line 69

def each_alignment_chunk
  leftover = @cigar_string
  while matches = leftover.match(/^(\d+)([MSIHNDP\=X])(.*)/)
    yield matches[2], matches[1].to_i
    leftover = matches[3]
  end
  unless leftover.length == 0
    raise "Incorrect parsing of cigar string #{@cigar_string}, at the end left with #{leftover}"
  end
end

#percent_identity(reference_sequence_string, query_sequence_string) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/bio-cigar/cigar.rb', line 9

def percent_identity(reference_sequence_string, query_sequence_string)
  num_match = 0
  num_mismatch = 0

  ref_index = 0
  query_index = 0
  each_alignment_chunk do |type, count|
#         puts "ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}"
#         puts "#{type} #{count}"
#         puts "ref=#{reference_sequence_string[ref_index...(reference_sequence_string.length)] }"
#         puts "query=#{query_sequence_string[query_index...(query_sequence_string.length)] }"
    case type
    when 'I'
      # Extra characters in the query sequence
      num_mismatch += count
      query_index += count
    when 'D'
      num_mismatch += count
      ref_index += count
    when 'S'
      #ref_index += count
      query_index += count
    when 'H'
      query_index += count
    when 'P'
      # Do nothing
    when 'N'
      # long skip on the reference sequence
      ref_index += count
    else
      if %w(M = X).include?(type)
        # For = and X, ignore these and recalculate, for ease of programming this method.
        (0...count).each do |i|
          if reference_sequence_string[ref_index+i] == query_sequence_string[query_index+i]
            num_match += 1
          else
            num_mismatch += 1
          end
        end
        ref_index += count
        query_index += count
      else
        raise "Cigar string not parsed correctly. Unrecognised alignment type #{type}"
      end
    end
    #puts "after, ref_i=#{ref_index}, query_index=#{query_index}, num_match=#{num_match}, num_mismatch=#{num_mismatch}"
  end

  percent = num_match.to_f / (num_match+num_mismatch)*100
  return percent, num_match, num_mismatch
end