Module: LibSSW
- Defined in:
- lib/libssw.rb,
lib/libssw/ffi.rb,
lib/libssw/align.rb,
lib/libssw/profile.rb,
lib/libssw/version.rb,
lib/libssw/BLOSUM50.rb,
lib/libssw/BLOSUM62.rb
Defined Under Namespace
Modules: FFI Classes: Align, Error, Profile
Constant Summary collapse
- AAELEMENTS =
['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'B', 'Z', 'X', '*']
- AA2INT =
{ 'A' => 0, 'a' => 0, 'R' => 1, 'r' => 1, 'N' => 2, 'n' => 2, 'D' => 3, 'd' => 3, 'C' => 4, 'c' => 4, 'Q' => 5, 'q' => 5, 'E' => 6, 'e' => 6, 'G' => 7, 'g' => 7, 'H' => 8, 'h' => 8, 'I' => 9, 'i' => 9, 'L' => 10, 'l' => 10, 'K' => 11, 'k' => 11, 'M' => 12, 'm' => 12, 'F' => 13, 'f' => 13, 'P' => 14, 'p' => 14, 'S' => 15, 's' => 15, 'T' => 16, 't' => 16, 'W' => 17, 'w' => 17, 'Y' => 18, 'y' => 18, 'V' => 19, 'v' => 19, 'B' => 20, 'b' => 20, 'Z' => 21, 'z' => 21, 'X' => 22, 'x' => 22, '*' => 23 }
- INT2AA =
{ 0 => 'A', 1 => 'R', 2 => 'N', 3 => 'D', 4 => 'C', 5 => 'Q', 6 => 'E', 7 => 'G', 8 => 'H', 9 => 'I', 10 => 'L', 11 => 'K', 12 => 'M', 13 => 'F', 14 => 'P', 15 => 'S', 16 => 'T', 17 => 'W', 18 => 'Y', 19 => 'V', 20 => 'B', 21 => 'Z', 22 => 'X', 23 => '*' }
- DNAElements =
%w[A C G T N]
- DNA2INT =
{ 'A' => 0, 'a' => 0, 'C' => 1, 'c' => 1, 'G' => 2, 'g' => 2, 'T' => 3, 't' => 3, 'N' => 4, 'n' => 4 }
- INT2DNA =
{ 0 => 'A', 1 => 'C', 2 => 'G', 3 => 'T', 4 => 'N' }
- DNARC =
reverse complement
{ 'A' => 'T', 'C' => 'G', 'G' => 'C', 'T' => 'A', 'N' => 'N', 'a' => 'T', 'c' => 'G', 'g' => 'C', 't' => 'A', 'n' => 'N' }
- VERSION =
'0.0.2'
- BLOSUM50 =
[ # A R N D C Q E G H I L K M F P S T W Y V B Z X * 5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -5, # A -2, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5, # R -1, -1, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 5, 0, -1, -5, # N -2, -2, 2, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 6, 1, -1, -5, # D -1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5, # C -1, 1, 0, 0, -3, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5, # Q -1, 0, 0, 2, -3, 2, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5, # E 0, -3, 0, -1, -3, -2, -3, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -1, -5, # G -2, 0, 1, -1, -3, 1, 0, -2, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5, # H -1, -4, -3, -4, -2, -3, -4, -4, -4, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5, # I -2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5, # L -1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5, # K -1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5, # M -3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -4, -3, -2, 1, 4, -1, -4, -4, -1, -5, # F -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5, # P 1, -1, 1, 0, -1, 0, -1, 0, -1, -3, -3, 0, -2, -3, -1, 5, 2, -4, -2, -2, 0, 0, -1, -5, # S 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -2, 0, 0, -1, -1, -5, # T -3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, 2, -3, -5, -2, -1, -5, # W -2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, -1, -3, -2, -1, -5, # Y 0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, -3, -3, -1, -5, # V -2, -1, 5, 6, -3, 0, 1, -1, 0, -4, -4, 0, -3, -4, -2, 0, 0, -5, -3, -3, 6, 1, -1, -5, # B -1, 0, 0, 1, -3, 4, 5, -2, 0, -3, -3, 1, -1, -4, -1, 0, -1, -2, -2, -3, 1, 5, -1, -5, # Z -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, # X -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1 # * ]
- BLOSUM62 =
[ # A R N D C Q E G H I L K M F P S T W Y V B Z X * 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4, # A -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4, # R -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4, # N -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4, # D 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4, # C -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4, # Q -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4, # E 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4, # G -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4, # H -1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4, # I -1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4, # L -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4, # K -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4, # M -2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4, # F -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4, # P 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4, # S 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4, # T -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4, # W -2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4, # Y 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4, # V -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4, # B -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4, # Z 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4, # X -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1, # * ]
Class Attribute Summary collapse
-
.ffi_lib ⇒ Object
Returns the value of attribute ffi_lib.
Class Method Summary collapse
- .aaseq_to_int_array(seq) ⇒ Object
-
.align_destroy(align) ⇒ Object
Release the memory allocated by function ssw_align.
- .array_to_cigar_string(arr) ⇒ Object
-
.create_scoring_matrix(elements, match_score, mismatch_score) ⇒ Object
Create scoring matrix of Smith-Waterman algrithum.
- .dna_complement(seq) ⇒ Object
- .dna_to_int_array(seq) ⇒ Object
-
.init_destroy(profile) ⇒ Object
Release the memory allocated by function ssw_init.
- .int_array_to_aaseq(arr) ⇒ Object
- .int_array_to_dna(arr) ⇒ Object
-
.mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len) ⇒ Integer
1.
-
.ssw_align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len) ⇒ Object
Do Striped Smith-Waterman alignment.
-
.ssw_init(read, mat, n = nil, score_size: 2) ⇒ Object
Create the query profile using the query sequence.
Class Attribute Details
.ffi_lib ⇒ Object
Returns the value of attribute ffi_lib.
11 12 13 |
# File 'lib/libssw.rb', line 11 def ffi_lib @ffi_lib end |
Class Method Details
.aaseq_to_int_array(seq) ⇒ Object
285 286 287 288 289 290 291 |
# File 'lib/libssw.rb', line 285 def aaseq_to_int_array(seq) raise ArgumentError, 'seq must be a string' unless seq.is_a? String seq.each_char.map do |base| AA2INT[base] || AA2INT['*'] end end |
.align_destroy(align) ⇒ Object
Release the memory allocated by function ssw_align.
201 202 203 |
# File 'lib/libssw.rb', line 201 def align_destroy(align) FFI.align_destroy(align) end |
.array_to_cigar_string(arr) ⇒ Object
234 235 236 237 238 239 240 241 242 243 |
# File 'lib/libssw.rb', line 234 def array_to_cigar_string(arr) cigar_string = String.new arr.each do |x| n = x >> 4 m = x & 15 c = m > 8 ? 'M' : 'MIDNSHP=X'[m] cigar_string << n.to_s << c end cigar_string end |
.create_scoring_matrix(elements, match_score, mismatch_score) ⇒ Object
Create scoring matrix of Smith-Waterman algrithum.
249 250 251 252 253 254 255 256 257 258 259 |
# File 'lib/libssw.rb', line 249 def create_scoring_matrix(elements, match_score, mismatch_score) size = elements.size score = Array.new(size * size, 0) (size - 1).times do |i| (size - 1).times do |j| score[i * size + j] = \ (elements[i] == elements[j] ? match_score : mismatch_score) end end score end |
.dna_complement(seq) ⇒ Object
270 271 272 273 274 |
# File 'lib/libssw.rb', line 270 def dna_complement(seq) seq.each_char.map do |base| DNARC[base] end.join.reverse end |
.dna_to_int_array(seq) ⇒ Object
262 263 264 265 266 267 268 |
# File 'lib/libssw.rb', line 262 def dna_to_int_array(seq) raise ArgumentError, 'seq must be a string' unless seq.is_a? String seq.each_char.map do |base| DNA2INT[base] || DNA2INT['N'] end end |
.init_destroy(profile) ⇒ Object
Ruby has garbage collection, so there is not much reason to call this method.
Release the memory allocated by function ssw_init.
137 138 139 |
# File 'lib/libssw.rb', line 137 def init_destroy(profile) FFI.init_destroy(profile) end |
.int_array_to_aaseq(arr) ⇒ Object
293 294 295 296 297 298 299 |
# File 'lib/libssw.rb', line 293 def int_array_to_aaseq(arr) raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array arr.map do |i| INT2AA[i] || '*' end.join end |
.int_array_to_dna(arr) ⇒ Object
277 278 279 280 281 282 283 |
# File 'lib/libssw.rb', line 277 def int_array_to_dna(arr) raise ArgumentError, 'arr must be an Array' unless arr.is_a? Array arr.map do |i| INT2DNA[i] || 'N' end.join end |
.mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len) ⇒ Integer
This method takes a Fiddle::Pointer as an argument. Please read the source code and understand it well before using this method. (Needs to be improved)
-
Calculate the number of mismatches.
-
Modify the cigar string:
differentiate matches (=), mismatches(X), and softclip(S).
227 228 229 230 231 232 |
# File 'lib/libssw.rb', line 227 def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len) warn 'implementation: fiexme: **cigar' # FIXME FFI.mark_mismatch( ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*') ) end |
.ssw_align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len) ⇒ Object
Do Striped Smith-Waterman alignment.
184 185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/libssw.rb', line 184 def ssw_align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len) ref_str = ref.pack('c*') ref_len = ref.size ptr = FFI.ssw_align( prof, ref_str, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len ) # Not sure yet if we should set the instance variable to the pointer as a # garbage collection workaround. # For example: instance_variable_set(:@ref_str, ref_str) # # ptr.free = FFI.instance_variable_get(:@func_map)['align_destroy'] LibSSW::Align.new(ptr) end |
.ssw_init(read, mat, n = nil, score_size: 2) ⇒ Object
Create the query profile using the query sequence.
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/libssw.rb', line 102 def ssw_init(read, mat, n = nil, score_size: 2) read_str = read.pack('c*') read_len = read.size mat = mat.to_a.flatten n = Math.sqrt(mat.size) if n.nil? raise "Not a square matrix. size: #{mat.size}, n: #{n}" if mat.size != n * n mat_str = mat.flatten.pack('c*') ptr = FFI.ssw_init( read_str, read_len, mat_str, n, score_size ) # Garbage collection workaround # # * The following code will cause a segmentation violation when manually # releasing memory. The reason is unknown. # * func_map is only available in newer versions of fiddle. # ptr.free = FFI.instance_variable_get(:@func_map)['init_destroy'] ptr.instance_variable_set(:@read_str, read_str) ptr.instance_variable_set(:@read_len, read_len) ptr.instance_variable_set(:@mat_str, mat_str) ptr.instance_variable_set(:@n, n) ptr.instance_variable_set(:@score_size, score_size) LibSSW::Profile.new(ptr) end |