Module: ChimericSeqs

Defined in:
lib/full_lengther_next/chimeric_seqs.rb

Constant Summary collapse

BEG =
0
STOP =
1
HIT =
2

Instance Method Summary collapse

Instance Method Details

#cluster_query_hits(query) ⇒ Object



211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 211

def cluster_query_hits(query)
	hits = []
	acc_hit = []
	query.hits.each do |hit|
		ind = acc_hit.index(hit.acc)
		if ind.nil?
			acc_hit << hit.acc
			hits << [hit]
		else
			hits[ind] << hit
		end
	end
	return hits		
end

#confirm_chimeras(homology_zones, db_path, ident_thresold) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 63

def confirm_chimeras(homology_zones, db_path, ident_thresold)
	acc_hit = homology_zones.map{|zone| zone[HIT].first.acc}
	seq_fasta = %x[blastdbcmd -db #{db_path} -entry #{acc_hit.join(',')}]
	seq_fasta << ">remove\nALGO\n" #Needed for clustal-omega display the dist-matrix, requires unless 3 sequences to do it

	clustal_matrix = do_clustal(seq_fasta)
	clustal_matrix.shift #Remove header
	clustal_matrix.pop #Remove false sequence 
	
	clustal_hits = []
	distances = []
	clustal_matrix.each do |line|
		fields = line.split
		fields.pop #Remove data belong to false sequence
		fields.shift #Remove prot name
		distances << fields.map! {|field| field.to_f}	
	end
	delete_positions = search_ident_prots(homology_zones, distances, ident_thresold)
	delete_zones(delete_positions, homology_zones)
end

#define_hit_limits(hits) ⇒ Object



187
188
189
190
191
192
193
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 187

def define_hit_limits(hits)
	limits=[]
	hits.each do |hit|
		limits << get_limits(hit) 
	end
	return limits
end

#define_homology_zones(query, options, query_fasta) ⇒ Object



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 150

def define_homology_zones(query, options, query_fasta)
	# Define hit limits
	#---------------------
	hits = cluster_query_hits(query) #Hsp packages
	hits_limits = define_hit_limits(hits)

	# Define homology zones
	#------------------------
	#First homology zone
	zones = [[hits_limits.first[BEG], hits_limits.first[STOP], hits.first]]
	ref_hit_beg = hits_limits.first[BEG]
	ref_hit_end = hits_limits.first[STOP]
	
	#Other homology zone
	hits_limits.each_with_index do |hit, i|
		coincidences = 0
		zones.each do |zone|
			if  hit_is_in?(zone[BEG], zone[STOP], hit) # Extender zona de homologia si coinciden en zona 
				zone[BEG] = [zone[BEG],hit[BEG]].min
				zone[STOP] = [zone[STOP],hit[STOP]].max
				coincidences+=1
			end
		end
		if coincidences == 0
			zones << [hit[BEG], hit[STOP], hits[i]]
		end
	end
	zones.sort!{|e1,e2| e1[BEG] <=> e2[BEG]}

	# Delete overlapping homology zones
	#------------------------------------
	overlapping_zones = overlapping_zones(zones)
	delete_zones(overlapping_zones, zones)

	return zones
end

#delete_zones(overlapping_zones, zones) ⇒ Object



226
227
228
229
230
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 226

def delete_zones(overlapping_zones, zones)
	overlapping_zones.each do |zone|
		zones.delete_at(zone)
	end
end

#do_clustal(seq_fasta) ⇒ Object



323
324
325
326
327
328
329
330
331
332
333
334
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 323

def do_clustal(seq_fasta)
	cmd='clustalo -i -  -o /dev/null --percent-id --full --distmat-out=/dev/stdout --force'
	clustal_matrix = nil
	IO.popen(cmd,'w+') {|clustal|
		clustal.sync = true
		clustal.write(seq_fasta)
		clustal.close_write
		clustal_matrix = clustal.readlines
		clustal.close_read
	}
	return clustal_matrix
end

#duplicate_hits(query) ⇒ Object



279
280
281
282
283
284
285
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 279

def duplicate_hits(query)
	dup_hits=[]
	query.hits.each do |hit|
		dup_hits << hit.dup
	end
	return dup_hits
end

#fragment_chimera(query, seq, hit, hit_position, hit_limits, num_homology_zones, options, db_name, cut_positions) ⇒ Object



102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 102

def fragment_chimera(query, seq, hit, hit_position, hit_limits, num_homology_zones, options, db_name, cut_positions)
	# Prepare new seq and query
	#----------------------------
	query_bak = query.dup
	query_bak.hits = hit # Here, hit is an array of hsps
	query_bak.query_def += "_split_#{hit_position}"
	seq_bak = seq.dup
	seq_bak.reset_classification
	seq_bak.clean_warnings
	seq_bak.seq_name += "_split_#{hit_position}"
	seq_bak.clean_orfs
	seq_bak.save_fasta = true
	seq_bak.ignore = false

	# Cut sequence and move hit/hsps limits
	#----------------------------------------
	if hit_position == 0 #First zone
		limit = 0
		if hit.first.q_frame < 0 #Hit reversed
			hit.first.q_frame = -1
		end
	else #Middle & last zone
		limit = cut_positions[BEG]#hit_limits[BEG]
		hit_move_limits(hit, -limit, 0) #Redefine hit limits on new sequence after cut	
		if hit.first.q_frame >= 0
			hit.first.q_frame=1
		elsif hit_position < num_homology_zones-1 #Last zone keeps his original frame because it's composed by the hit and the terminal sequence (Here hit is reversed).
			hit.first.q_frame=-1
		end
	end
	if hit_position == num_homology_zones-1 #Last zone
		seq_bak.seq_fasta = seq.seq_fasta[cut_positions[BEG]..seq.fasta_length-1]#[hit_limits[BEG]..seq.fasta_length-1]
	else	# Beginning & Middle zone
		seq_bak.seq_fasta = seq.seq_fasta[limit..cut_positions[STOP]]#[limit..hit_limits[STOP]]
	end
	seq_length = seq_bak.seq_fasta.length
	query_bak.full_query_length = seq_length
	seq_bak.fasta_length = seq_length
	hit_set_q_len(hit, seq_length)


	# Full length analisys of fragment
	#----------------------------------------	
	analiza_orf_y_fl(seq_bak, query_bak.hits, options, db_name)

	return seq_bak
end

#get_hits(query, ref_hit) ⇒ Object



255
256
257
258
259
260
261
262
263
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 255

def get_hits(query, ref_hit)
	all_hits=[]
	query.hits.each do |hit|
		if hit.acc == ref_hit.acc
			all_hits << hit
		end
	end
	return all_hits
end

#get_limits(hit) ⇒ Object



195
196
197
198
199
200
201
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 195

def get_limits(hit)
	coordenates=[]
	hit.map{|h| coordenates << h.q_beg; coordenates << h.q_end}	
	#		BEG				END		
		limits=[coordenates.min, coordenates.max]
	return limits
end

#get_limits_s(hit) ⇒ Object



203
204
205
206
207
208
209
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 203

def get_limits_s(hit)
	coordenates=[]
	hit.map{|h| coordenates << h.s_beg; coordenates << h.s_end}	
	#		BEG				END		
		limits=[coordenates.min, coordenates.max]
	return limits
end

#hit_is_in?(h_beg, h_end, hit) ⇒ Boolean

Returns:

  • (Boolean)


246
247
248
249
250
251
252
253
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 246

def hit_is_in?(h_beg, h_end, hit)
	is=false
			# CONTIENE					#OVERLAP
	if h_beg <= hit[BEG] && h_end > hit[BEG] || hit[BEG] <= h_beg && hit[STOP] > h_beg
		is=true
	end
	return is
end

#hit_move_limits(hit, q_add, s_add) ⇒ Object



306
307
308
309
310
311
312
313
314
315
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 306

def hit_move_limits(hit, q_add, s_add)
	if hit.class.to_s == 'Array'
		hit.each do |hsp|
			move_limits(hsp, q_add, s_add)
		end
	elsif hit.class.to_s == 'Hit'
		#puts "\e[35m#{hit.acc}\t#{hit.q_beg}\t#{hit.q_end}\t#{hit.s_beg}\t#{hit.s_end}\t#{hit.reversed}\e[0m"
		move_limits(hit, q_add, s_add)
	end
end

#hit_set_q_len(hit, q_len) ⇒ Object



317
318
319
320
321
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 317

def hit_set_q_len(hit, q_len)
	hit.each do |hsp|
		hsp.q_len=q_len
	end
end

#min_distance_between_homology_zones(homology_zones) ⇒ Object



266
267
268
269
270
271
272
273
274
275
276
277
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 266

def min_distance_between_homology_zones(homology_zones)
	distance=nil
	homology_zones.each_with_index do |zone,i|
		if i > 0
			local_distance=homology_zones[i][BEG] - homology_zones[i-1][STOP]
			if distance.nil? || distance > local_distance
				distance=local_distance
			end	
		end
	end
	return distance
end

#move_limits(hit, q_add, s_add) ⇒ Object



294
295
296
297
298
299
300
301
302
303
304
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 294

def move_limits(hit, q_add, s_add)
	hit.q_beg+=q_add
	hit.q_end+=q_add
	hit.s_beg+=s_add
	hit.s_end+=s_add
	if hit.class.to_s == 'ExoBlastHit' && !hit.q_frameshift.empty? #There is frameshift
		hit.q_frameshift.map!{|fs| 
			[fs.first + q_add, fs.last]
		}
	end
end

#overlapping_zones(zones) ⇒ Object



232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 232

def overlapping_zones(zones)
	delete_zones=[]
	zones.each_with_index do |zone, i|
		if i>0
			if zone[BEG]< zones[i-1][STOP]
				delete_zones << i
				delete_zones << i-1	
			end
		end
	end
	delete_zones.uniq!
	return delete_zones
end

#search_chimeras(seq, blast_query, options, db_name, db_path) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 9

def search_chimeras(seq, blast_query, options, db_name, db_path)
	
	# DETECTION
	#----------------------
	homology_zones = []
	cut_positions = []
	if blast_query.hits.length > 1 
		homology_zones = define_homology_zones(blast_query, options, seq.seq_fasta)		
		cut_positions = set_cut_positions(homology_zones) if homology_zones.length > 1
	end
	# CONFIRMATION
	#----------------------
	num_homology_zones = homology_zones.length
	if num_homology_zones > 1 && options[:chimera].include?('r')
		confirm_chimeras(homology_zones, db_path, options[:ident_thresold]) # Check if prots are differents or not
		num_homology_zones = homology_zones.length
	end

	# SPLICING
	#--------------------
	new_seqs=[]		
	if num_homology_zones > 1 #In this case the sequence is a chimera
		seq.format_chimera!
		homology_zones.each_with_index do |hom_zone, i|
			seq.hit << hom_zone[HIT].first.dup #Save hit before modified it for write output purposes
			hit_limits = get_limits(hom_zone[HIT])# Take beginning and end of hit on query, hit can be composed by unsorted or antisense hsps
			if options[:chimera].include?('c') && hit_limits[STOP]-hit_limits[BEG]> options[:min_nucleotides] 
				new_seqs << fragment_chimera(blast_query, seq, hom_zone[HIT], i, hit_limits, num_homology_zones, options, db_name, cut_positions[i])
				seq.warnings('SOLVED')
			end
		end
	else
		new_seqs = nil #Sequence isn't chimera
	end
	return new_seqs
end

#search_ident_prots(homology_zones, distances, ident_thresold) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 85

def search_ident_prots(homology_zones, distances, ident_thresold)
	delete_positions = []		
	n_homology_zones = homology_zones.length
	n_homology_zones.times do |j|
		n_homology_zones.times do |i|			
			next if i == j
			if distances[j][i] >= ident_thresold
				delete_positions << j
				delete_positions << i
			end
		end
	end
	delete_positions.uniq!
	return delete_positions
end

#set_cut_positions(homology_zones) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 46

def set_cut_positions(homology_zones)
	cut_positions = []
	last_cut = -1
	homology_zones.each_with_index do |hom_zone, i|
		if i > 0
			positions = []
			positions << last_cut + 1 # Start of fragment
			cut_position = homology_zones[i-1][STOP] + (hom_zone[BEG] - homology_zones[i-1][STOP])/2
			positions << cut_position # End of fragment
			last_cut = cut_position
			cut_positions << positions
		end
	end
	cut_positions << [last_cut, homology_zones.last[HIT].first.q_len-1]
	return cut_positions		
end

#set_limits(hit, q_beg, q_end, s_beg, s_end) ⇒ Object



287
288
289
290
291
292
# File 'lib/full_lengther_next/chimeric_seqs.rb', line 287

def set_limits(hit, q_beg, q_end, s_beg, s_end)
	hit.q_beg = q_beg
	hit.q_end = q_end
	hit.s_beg = s_beg
	hit.s_end = s_end
end