Module: SSW

Defined in:
lib/libssw.rb,
lib/ssw/dna.rb,
lib/ssw/aaseq.rb,
lib/ssw/align.rb,
lib/ssw/libssw.rb,
lib/ssw/profile.rb,
lib/ssw/version.rb,
lib/ssw/BLOSUM50.rb,
lib/ssw/BLOSUM62.rb

Defined Under Namespace

Modules: AASeq, DNA, LibSSW Classes: Align, Error, Profile

Constant Summary collapse

VERSION =
'0.0.5'
BLOSUM50 =
[
  #  A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   *
 5, -2, -1, -2, -1, -1, -1,  0, -2, -1, -2, -1, -1, -3, -1,  1,  0, -3, -2,  0, -2, -1, -1, -5, # A
-2,  7, -1, -2, -4,  1,  0, -3,  0, -4, -3,  3, -2, -3, -3, -1, -1, -3, -1, -3, -1,  0, -1, -5, # R
-1, -1,  7,  2, -2,  0,  0,  0,  1, -3, -4,  0, -2, -4, -2,  1,  0, -4, -2, -3,  5,  0, -1, -5, # N
-2, -2,  2,  8, -4,  0,  2, -1, -1, -4, -4, -1, -4, -5, -1,  0, -1, -5, -3, -4,  6,  1, -1, -5, # D
-1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5, # C
-1,  1,  0,  0, -3,  7,  2, -2,  1, -3, -2,  2,  0, -4, -1,  0, -1, -1, -1, -3,  0,  4, -1, -5, # Q
-1,  0,  0,  2, -3,  2,  6, -3,  0, -4, -3,  1, -2, -3, -1, -1, -1, -3, -2, -3,  1,  5, -1, -5, # E
 0, -3,  0, -1, -3, -2, -3,  8, -2, -4, -4, -2, -3, -4, -2,  0, -2, -3, -3, -4, -1, -2, -1, -5, # G
-2,  0,  1, -1, -3,  1,  0, -2, 10, -4, -3,  0, -1, -1, -2, -1, -2, -3,  2, -4,  0,  0, -1, -5, # H
-1, -4, -3, -4, -2, -3, -4, -4, -4,  5,  2, -3,  2,  0, -3, -3, -1, -3, -1,  4, -4, -3, -1, -5, # I
-2, -3, -4, -4, -2, -2, -3, -4, -3,  2,  5, -3,  3,  1, -4, -3, -1, -2, -1,  1, -4, -3, -1, -5, # L
-1,  3,  0, -1, -3,  2,  1, -2,  0, -3, -3,  6, -2, -4, -1,  0, -1, -3, -2, -3,  0,  1, -1, -5, # K
-1, -2, -2, -4, -2,  0, -2, -3, -1,  2,  3, -2,  7,  0, -3, -2, -1, -1,  0,  1, -3, -1, -1, -5, # M
-3, -3, -4, -5, -2, -4, -3, -4, -1,  0,  1, -4,  0,  8, -4, -3, -2,  1,  4, -1, -4, -4, -1, -5, # F
-1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5, # P
 1, -1,  1,  0, -1,  0, -1,  0, -1, -3, -3,  0, -2, -3, -1,  5,  2, -4, -2, -2,  0,  0, -1, -5, # S
 0, -1,  0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1,  2,  5, -3, -2,  0,  0, -1, -1, -5, # T
-3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1,  1, -4, -4, -3, 15,  2, -3, -5, -2, -1, -5, # W
-2, -1, -2, -3, -3, -1, -2, -3,  2, -1, -1, -2,  0,  4, -3, -2, -2,  2,  8, -1, -3, -2, -1, -5, # Y
 0, -3, -3, -4, -1, -3, -3, -4, -4,  4,  1, -3,  1, -1, -3, -2,  0, -3, -1,  5, -3, -3, -1, -5, # V
-2, -1,  5,  6, -3,  0,  1, -1,  0, -4, -4,  0, -3, -4, -2,  0,  0, -5, -3, -3,  6,  1, -1, -5, # B
-1,  0,  0,  1, -3,  4,  5, -2,  0, -3, -3,  1, -1, -4, -1,  0, -1, -2, -2, -3,  1,  5, -1, -5, # Z
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, # X
-5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,  1  # *
]
BLOSUM62 =
[
  #  A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   *
 4, -1, -2, -2,  0, -1, -1,  0, -2, -1, -1, -1, -1, -2, -1,  1,  0, -3, -2,  0, -2, -1,  0, -4, # A
-1,  5,  0, -2, -3,  1,  0, -2,  0, -3, -2,  2, -1, -3, -2, -1, -1, -3, -2, -3, -1,  0, -1, -4, # R
-2,  0,  6,  1, -3,  0,  0,  0,  1, -3, -3,  0, -2, -3, -2,  1,  0, -4, -2, -3,  3,  0, -1, -4, # N
-2, -2,  1,  6, -3,  0,  2, -1, -1, -3, -4, -1, -3, -3, -1,  0, -1, -4, -3, -3,  4,  1, -1, -4, # D
 0, -3, -3, -3,  9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4, # C
-1,  1,  0,  0, -3,  5,  2, -2,  0, -3, -2,  1,  0, -3, -1,  0, -1, -2, -1, -2,  0,  3, -1, -4, # Q
-1,  0,  0,  2, -4,  2,  5, -2,  0, -3, -3,  1, -2, -3, -1,  0, -1, -3, -2, -2,  1,  4, -1, -4, # E
 0, -2,  0, -1, -3, -2, -2,  6, -2, -4, -4, -2, -3, -3, -2,  0, -2, -2, -3, -3, -1, -2, -1, -4, # G
-2,  0,  1, -1, -3,  0,  0, -2,  8, -3, -3, -1, -2, -1, -2, -1, -2, -2,  2, -3,  0,  0, -1, -4, # H
-1, -3, -3, -3, -1, -3, -3, -4, -3,  4,  2, -3,  1,  0, -3, -2, -1, -3, -1,  3, -3, -3, -1, -4, # I
-1, -2, -3, -4, -1, -2, -3, -4, -3,  2,  4, -2,  2,  0, -3, -2, -1, -2, -1,  1, -4, -3, -1, -4, # L
-1,  2,  0, -1, -3,  1,  1, -2, -1, -3, -2,  5, -1, -3, -1,  0, -1, -3, -2, -2,  0,  1, -1, -4, # K
-1, -1, -2, -3, -1,  0, -2, -3, -2,  1,  2, -1,  5,  0, -2, -1, -1, -1, -1,  1, -3, -1, -1, -4, # M
-2, -3, -3, -3, -2, -3, -3, -3, -1,  0,  0, -3,  0,  6, -4, -2, -2,  1,  3, -1, -3, -3, -1, -4, # F
-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4,  7, -1, -1, -4, -3, -2, -2, -1, -2, -4, # P
 1, -1,  1,  0, -1,  0,  0,  0, -1, -2, -2,  0, -1, -2, -1,  4,  1, -3, -2, -2,  0,  0,  0, -4, # S
 0, -1,  0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1,  1,  5, -2, -2,  0, -1, -1,  0, -4, # T
-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1,  1, -4, -3, -2, 11,  2, -3, -4, -3, -2, -4, # W
-2, -2, -2, -3, -2, -1, -2, -3,  2, -1, -1, -2, -1,  3, -3, -2, -2,  2,  7, -1, -3, -2, -1, -4, # Y
 0, -3, -3, -3, -1, -2, -2, -3, -3,  3,  1, -2,  1, -1, -2, -2,  0, -3, -1,  4, -3, -2, -1, -4, # V
-2, -1,  3,  4, -3,  0,  1, -1,  0, -3, -4,  0, -3, -3, -2,  0, -1, -4, -3, -3,  4,  1, -1, -4, # B
-1,  0,  0,  1, -3,  3,  4, -2,  0, -3, -3,  1, -1, -3, -1,  0, -1, -3, -2, -2,  1,  4, -1, -4, # Z
 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2,  0,  0, -2, -1, -1, -1, -1, -1, -4, # X
-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,  1, # *
]

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.ffi_libObject

Returns the value of attribute ffi_lib.



13
14
15
# File 'lib/libssw.rb', line 13

def ffi_lib
  @ffi_lib
end

Class Method Details

.align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len = nil) ⇒ Align

Do Striped Smith-Waterman alignment.

Raises:

  • (ArgumentError)


139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/libssw.rb', line 139

def align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len = nil)
  unless prof.is_a?(Fiddle::Pointer) || prof.is_a?(Profile) || prof.respond_to?(:to_ptr)
    raise ArgumentError, 'Expect class of filename to be Profile or Pointer'
  end
  raise ArgumentError, 'Expect class of ref to be Array' unless ref.is_a?(Array)

  ref_str = ref.pack('c*')
  ref_len = ref.size
  mask_len ||= [ref_len / 2, 15].max
  ptr = LibSSW.ssw_align(
    prof, ref_str, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len
  )
  # Garbage collection workaround:
  # Keep ref_str alive while the C code might still need it.
  # However, since Align.new immediately reads all values and calls align_destroy,
  # the C memory is freed immediately, so ref_str only needs to live until then.
  # We store it on ptr just to be safe during the Align.new call.
  ptr.instance_variable_set(:@ref_str, ref_str)
  SSW::Align.new(ptr)
end

.align_destroy(align) ⇒ Object

Release the memory allocated by function ssw_align.



163
164
165
166
167
168
169
170
171
172
# File 'lib/libssw.rb', line 163

def align_destroy(align)
  if align.is_a?(Align)
    warn "You don't need to call this method for Ruby's Align class."
    nil
  elsif align.is_a?(Fiddle::Pointer) || align.respond_to?(:to_ptr)
    LibSSW.align_destroy(align)
  else
    raise ArgumentError, 'Expect class of align to be Pointer'
  end
end

.build_path(q_seq, r_seq, align) ⇒ Array

TODO: fix variable names



224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# File 'lib/libssw.rb', line 224

def build_path(q_seq, r_seq, align)
  sQ = ''
  sA = ''
  sR = ''
  q_off = align.read_begin1
  r_off = align.ref_begin1
  align.cigar.each do |x|
    n = x >> 4
    m = x & 15
    c = m > 8 ? 'M' : 'MIDNSHP=X'[m]
    case c
    when 'M'
      sQ += q_seq[q_off...(q_off + n)]
      sA += Array.new(n) { |j| q_seq[q_off + j] == r_seq[r_off + j] ? '|' : '*' }.join
      sR += r_seq[r_off...(r_off + n)]
      q_off += n
      r_off += n
    when 'I'
      sQ += q_seq[q_off...(q_off + n)]
      sA += ' ' * n
      sR += ' ' * n
      q_off += n
    when 'D'
      sQ += ' ' * n
      sA += ' ' * n
      sR += r_seq[r_off...(r_off + n)]
      r_off += n
    end
  end
  [align.cigar_string, sQ, sA, sR]
end

.create_scoring_matrix(elements, match_score, mismatch_score) ⇒ Object

Create scoring matrix of Smith-Waterman algrithum.



207
208
209
210
211
212
213
214
215
216
217
# File 'lib/libssw.rb', line 207

def create_scoring_matrix(elements, match_score, mismatch_score)
  size = elements.size
  score = Array.new(size * size, 0)
  (size - 1).times do |i|
    (size - 1).times do |j|
      score[i * size + j] = \
        (elements[i] == elements[j] ? match_score : mismatch_score)
    end
  end
  score
end

.init(read, mat, n = nil, score_size: 2) ⇒ Object

Create the query profile using the query sequence.

Raises:

  • (ArgumentError)


44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/libssw.rb', line 44

def init(read, mat, n = nil, score_size: 2)
  read = read.to_a
  mat = mat.to_a.flatten
  raise ArgumentError, 'Expect class of read to be Array' unless read.is_a?(Array)
  raise ArgumentError, 'Expect class of mat to be Array' unless mat.is_a?(Array)

  read_str = read.pack('c*')
  read_len = read.size
  n = Math.sqrt(mat.size) if n.nil?
  raise "Not a square matrix. size: #{mat.size}, n: #{n}" if mat.size != n * n

  mat_str = mat.flatten.pack('c*')
  ptr = LibSSW.ssw_init(
    read_str,
    read_len,
    mat_str,
    n,
    score_size
  )
  # Garbage collection workaround:
  # The C library stores pointers to read and mat without copying the data.
  # We must keep the Ruby strings (read_str, mat_str) alive for the lifetime
  # of the profile structure to prevent segmentation faults.
  #
  # We cannot use Fiddle's automatic memory management (ptr.free) here because:
  # - Calling init_destroy from Ruby's GC causes segmentation violations
  # - The user should explicitly call SSW.init_destroy when done, or let
  #   Ruby's GC clean up the profile structure itself (though the contained
  #   profile_byte/profile_word will leak unless init_destroy is called)
  ptr.instance_variable_set(:@read_str,   read_str)
  ptr.instance_variable_set(:@mat_str,    mat_str)
  ptr.instance_variable_set(:@read_len,   read_len)
  ptr.instance_variable_set(:@n,          n)
  ptr.instance_variable_set(:@score_size, score_size)

  SSW::Profile.new(ptr)
end

.init_destroy(profile) ⇒ Object

Note:

Ruby has garbage collection, so there is not much reason to call this method.

Release the memory allocated by function ssw_init.



87
88
89
90
91
92
93
# File 'lib/libssw.rb', line 87

def init_destroy(profile)
  unless profile.is_a?(Fiddle::Pointer) || profile.is_a?(Profile) || profile.respond_to?(:to_ptr)
    raise ArgumentError, 'Expect class of profile to be Profile or Pointer'
  end

  LibSSW.init_destroy(profile)
end

.mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len) ⇒ Integer

Note:

This method takes a Fiddle::Pointer as an argument. Please read the source code and understand it well before using this method. (Needs to be improved)

  1. Calculate the number of mismatches.

  2. Modify the cigar string:

differentiate matches (=), mismatches(X), and softclip(S).



196
197
198
199
200
201
# File 'lib/libssw.rb', line 196

def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len)
  warn 'implementation: fiexme: **cigar' # FIXME
  LibSSW.mark_mismatch(
    ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*')
  )
end