87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
|
# File 'lib/finishm/primers.rb', line 87
def run(options, argv)
Bio::Log::CLI.configure('bio-primer3')
contigs = []
Bio::FlatFile.foreach(options[:contigs_file]) do |entry|
contigs.push entry
end
log.info "Read in #{contigs.length} contigs from #{options[:contigs_file] }"
min_length = contigs.collect{|contig| contig.seq.length}.min
log.info "Minimum contig length #{min_length}"
unless options[:min_distance] < min_length/ 2
log.error "Minimum primer distance from the ends of the contigs is too small, as the smallest contig is #{min_length} long, and the min distance must be at least twice this distance"
exit 1
end
= {}
unless options[:min_primer_size].nil?
.merge!({
'PRIMER_MIN_SIZE' => options[:min_primer_size],
})
end
unless options[:melting_temperature_optimum].nil?
.merge!({
'PRIMER_OPT_TM' => options[:melting_temperature_optimum],
'PRIMER_MIN_TM' => options[:melting_temperature_optimum]-options[:melting_temperature_tolerance],
'PRIMER_MAX_TM' => options[:melting_temperature_optimum]+options[:melting_temperature_tolerance],
})
end
unless options[:extra_global_primer3_options].nil?
.merge! options[:extra_global_primer3_options]
end
if log.debug?
.merge! 'PRIMER_EXPLAIN_FLAG' => '1'
end
primer3_results = []
contigs.each do |contig|
start_chunk = contig.seq[options[:min_distance]..options[:max_distance]]
end_chunk = contig.seq[(contig.length-options[:max_distance]) .. (contig.length-options[:min_distance])].downcase
log.debug "Start chunk length #{start_chunk.length}, end chunk length #{end_chunk.length}"
num_ns = 100
joined = end_chunk+'N'*num_ns+start_chunk
result = Bio::Primer3.run({
'SEQUENCE_TEMPLATE' => joined,
'PRIMER_TASK' => 'pick_sequencing_primers',
'SEQUENCE_TARGET' => "#{end_chunk.length},#{num_ns}",
'PRIMER_NUM_RETURN'=>'5',
'PRIMER_PRODUCT_SIZE_RANGE'=>"#{num_ns}-#{joined.length}",
}.merge()
)
log.debug "primer3 returned the following result: #{result.output_hash.inspect}"
if result.yeh?
fwds = []
reverses = []
(0...result['PRIMER_LEFT_NUM_RETURNED'].to_i).each do |pair_number|
fwds.push result["PRIMER_RIGHT_#{pair_number}_SEQUENCE"]
reverses.push result["PRIMER_LEFT_#{pair_number}_SEQUENCE"]
end
contig_name = contig.definition
f = PrimerList.new
f.contig_side = PrimerList::START_OF_CONTIG
f.primers = fwds
f.contig_name = contig_name
primer3_results.push f
r = PrimerList.new
r.contig_side = PrimerList::END_OF_CONTIG
r.primers = reverses
r.contig_name = contig_name
primer3_results.push r
else
log.error "For failing primer #{contig.definition}, primer3 result was: #{result.output_hash.inspect}"
log.error "No primers found for contig #{contig.definition}, giving up"
exit 1 unless options[:persevere]
end
end
log.info "Finished getting first round of primers, now have #{primer3_results.length} sets of primers e.g. #{primer3_results[0].inspect}"
if log.debug?
log.debug "Primer sets to be validated:"
primer3_results.each do |res|
log.debug res.inspect
end
end
failed = false
current_path = [0]
next_index_to_change = 0
primer_sets = primer3_results.collect{|s| s.primers}
while !failed and next_index_to_change < primer3_results.length
if next_index_to_change == 0
current_path[0]=0
if current_path[0]==primer_sets.length
failed = true
else
next_index_to_change += 1
end
else
current_path[next_index_to_change] ||= -1
if current_path[next_index_to_change] >= primer_sets[next_index_to_change].length
next_index_to_change -= 1
else
current_path[next_index_to_change] += 1
primer_to_test = primer_sets[next_index_to_change][current_path[next_index_to_change]]
previous_primer_list = []
(0...next_index_to_change).each do |i|
previous_primer_list.push primer_sets[i][current_path[i]]
end
failed_against_prev = false
previous_primer_list.each_with_index do |prev, i|
log.debug "Testing #{primer_to_test.inspect} against #{prev.inspect}"
if Bio::Primer3.test_primer_compatibility(primer_to_test, prev, ) == false
log.error "Found an incompatible pair of primers, unfortunately"
log.error "This route through the code has never been tested, so you'll have to take a look at the code to ensure there is no bugs. Or else run this program with different parameters. Exiting."
exit 1
failed_against_prev = true
break
else
log.debug "Compatible, cool"
end
end
if failed_against_prev
log.debug "At least one primer was incompatible, trying again"
else
log.debug "All compatible, cool. Now moving to the next index"
current_path[next_index_to_change+1] = nil unless next_index_to_change+1 >= primer_sets.length
next_index_to_change += 1
end
end
end
end
if failed
log.error "Sorry, no sets of primers satisfy the criteria"
exit 1
end
log.info "Double checking to make sure there is no incompatibilities between primer pairs"
num_compared = 0
(0...primer_sets.length).to_a.combination(2) do |array|
primer1 = primer_sets[array[0]][current_path[array[0]]]
primer2 = primer_sets[array[1]][current_path[array[1]]]
result, res = Bio::Primer3.test_primer_compatibility(primer1, primer2, , :return_result => true)
num_compared += 1
if result == false
log.error "Programming error!! There was supposed to be an OK path, but that path wasn't OK in the validation (#{primer1} and #{primer2}) were the problem"
exit 1 unless options[:persevere]
end
end
log.info "Validated #{num_compared} different pairs of primers, they don't seem to conflict with each other at all, according to primer3's check primers thing"
log.debug "in-silico PCR: making sure there are no spurious primer pairings within the contigs themselves"
ipcress_options = {:min_distance => 1, :max_distance => 10000, :mismatches => 0}
num_compared = 0
(0...primer_sets.length).to_a.combination(2) do |array|
primer1 = primer_sets[array[0]][current_path[array[0]]]
primer2 = primer_sets[array[1]][current_path[array[1]]]
primer_set = Bio::Ipcress::PrimerSet.new primer1, primer2
result = Bio::Ipcress.run primer_set, options[:contigs_file], ipcress_options
num_compared += 1
log.debug "Ipcress output:"
log.debug result.inspect
unless result.length == 0
set1 = primer3_results[array[0]]
set2 = primer3_results[array[1]]
log.error "Unanticipated products generated from primer pair #{set1.contig_name}/#{set1.contig_side}/#{primer1} and #{set2.contig_name}/#{set2.contig_side}/#{primer2}. Sorry, fail."
exit 1 unless options[:persevere]
end
end
log.info "Validated #{num_compared} different pairs of primers so that unanticipated products are not formed according to iPCRess, and there doesn't seem to be any of those. Yey."
num_compared = 0
contigs_hash = {}
contigs.each do |contig|
contigs_hash[contig.definition] = contig.seq
end
log.debug "in-silico PCR: making sure expected products are generated..."
(0...primer_sets.length).to_a.combination(2) do |array|
set1 = primer3_results[array[0]]
set2 = primer3_results[array[1]]
primer1 = set1.primers[current_path[array[0]]]
primer2 = set2.primers[current_path[array[1]]]
log.debug "Testing #{primer1} and #{primer2}: indicies #{array[0] } and #{array[1] }, contigs #{set1.contig_name} #{set1.contig_side } and #{set2.contig_name} #{set2.contig_side}"
primer_set = Bio::Ipcress::PrimerSet.new primer1, primer2
Tempfile.open('ipcress') do |tempfile|
s = PrimerList::START_OF_CONTIG
e = PrimerList::END_OF_CONTIG
f1 = contigs_hash[set1.contig_name]
r1 = ' '+Bio::Sequence::NA.new(contigs_hash[set1.contig_name]).reverse_complement.to_s.upcase
f2 = contigs_hash[set2.contig_name]
r2 = ' '+Bio::Sequence::NA.new(contigs_hash[set2.contig_name]).reverse_complement.to_s.upcase
seqs_ordered = case [set1.contig_side, set2.contig_side]
when [s,s]
log.debug('case s s'); [r1,f2]
when [s,e]
log.debug('case s e'); [r1,r2]
when [e,s]
log.debug('case e s'); [f1,f2]
when [e,e]
log.debug('case e e'); [f1,r2]
end
tempfile.puts '>test'
tempfile.puts seqs_ordered[0]
tempfile.puts 'N'*100
tempfile.puts seqs_ordered[1]
tempfile.close
if log.debug?
log.debug "Testing with iPCRess #{primer1} and #{primer2} from #{set1.contig_name} and #{set2.contig_name}, respectively"
log.debug "Input fasta file"
log.debug `cat #{tempfile.path} >/tmp/ta` if log.debug?
log.debug "Fasta file input stats:"
log.debug `seqstat #{tempfile.path}`
end
results = Bio::Ipcress.run primer_set, tempfile.path, ipcress_options
num_compared += 1
unless results.length == 1
if results.length == 0
log.error "Anticipated products not generated in the hypothetical scenario of #{primer1} and #{primer2}, from #{set1.contig_name} and #{set2.contig_name}, respectively"
exit 1 unless options[:persevere]
else
log.error "Too many PCR products generated in the hypothetical scenario of #{primer1} and #{primer2}, from #{set1.contig_name} and #{set2.contig_name}, respectively"
log.error "Specifically, these PCR products were generated:"
results.each do |res|
log.error res.inspect
end
exit 1 unless options[:persevere]
end
end
end
num_compared += 1
end
log.info "Validated #{num_compared} different pairs of primers so that the anticipated products are formed according to iPCRess, and all seem to be as expected. Yey."
num_compared = 0
if options[:contig_universe]
log.info "in-silico PCR: testing the contig universe. This could probably be sped up by only making a single call to ipcress, but oh well."
num_compared = 0
(0...primer_sets.length).to_a.combination(2) do |array|
primer1 = primer_sets[array[0]][current_path[array[0]]]
primer2 = primer_sets[array[1]][current_path[array[1]]]
primer_set = Bio::Ipcress::PrimerSet.new primer1, primer2
results = Bio::Ipcress.run primer_set, options[:contig_universe], ipcress_options
num_compared += 1
if results.length > 0
log.warn "Found #{results.length} matches between #{primer1} and #{primer2} in the contig universe, expected none."
exit 1 unless options[:persevere]
end
print '.' if log.info?
end
puts if log.info?
else
log.info "Not checking to see if primers match any other contigs not targeted are picked up by these primers, because no universe was specified."
end
log.debug "Tested #{num_compared} pairs of primers to see if anything else was hit, warnings above if any did so"
if current_path.length == contigs.length*2
log.info "Hoorah! Found an ok set of primers"
else
log.error "Unable to find a complete set of primers with the constraints given to primer3. Printing the primers that were found anyway.."
end
puts %w(Contig Side Index Primer).join("\t")
current_path.each_with_index do |primer_index, primer_set_index|
break if primer_set_index >= primer3_results.length
end_info = primer3_results[primer_set_index]
primer = end_info.primers[primer_index]
puts [
end_info.contig_name,
end_info.contig_side,
primer_index,
primer,
].join("\t")
end
end
|