Class: UneLosHit

Inherits:
Object
  • Object
show all
Defined in:
lib/full_lengther_next/une_los_hit.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(full_hit, query_fasta) ⇒ UneLosHit

Returns a new instance of UneLosHit.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/full_lengther_next/une_los_hit.rb', line 9

def initialize(full_hit, query_fasta)
	#puts 'BEG ___________________'
	#full_hit.map{|hsp| puts hsp.inspect}
	mismas_ids_array, query_fasta = hits_misma_id(full_hit, query_fasta)
	#puts '..................'
	@mismas_ids_array = mismas_ids_array
	@msgs = []
	@output_seq = query_fasta

	if mismas_ids_array.count > 1
		mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
		@final_hit = mismas_ids_array.shift # We take first hsp like reference for unigene reconstruction
		#puts @output_seq.length
		#puts "\e[32m#{@final_hit.inspect}\e[0m"
		mismas_ids_array.each do |hit|
			#puts '.....', "\e[31m#{hit.inspect}\e[0m"
			##if @final_hit.q_frame == hit.q_frame #Same frame
			##	puts "\e[33mSame Frame\e[0m"
			##	same_frame_hits_query(hit)
			if  overlapping_hits?(hit) #Diff frame
				if @msgs.empty?
					@msgs << ['OverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
				else
					@msgs << ['AndOverlapHit', @final_hit.q_end + 1, hit.q_beg + 1]
				end
				#puts "\e[33mOverlapped hits\e[0m"
				overlapped_hits_query(hit)			
			elsif  separated_hits?(hit) #Diff frame
				if @msgs.empty?
					@msgs << ['SeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
      				else
					@msgs << ['AndSeparatedHit', @final_hit.q_end + 1, hit.q_beg + 1]
				end
				#puts "\e[33mSeparated hits\e[0m"
				separated_hits(hit) #Diff frame
			end
		#puts @output_seq.length
		#puts  '.....'
		#puts "\e[32m#{@final_hit.inspect}\e[0m"
		end
	else
		@final_hit = mismas_ids_array.shift			
	end
	#puts 'END ___________________'
	#puts @final_hit.inspect
end

Instance Attribute Details

#final_hitObject (readonly)

Returns the value of attribute final_hit.



7
8
9
# File 'lib/full_lengther_next/une_los_hit.rb', line 7

def final_hit
  @final_hit
end

#full_protObject (readonly)

Returns the value of attribute full_prot.



7
8
9
# File 'lib/full_lengther_next/une_los_hit.rb', line 7

def full_prot
  @full_prot
end

#is_okObject (readonly)

Returns the value of attribute is_ok.



7
8
9
# File 'lib/full_lengther_next/une_los_hit.rb', line 7

def is_ok
  @is_ok
end

#msgsObject (readonly)

Returns the value of attribute msgs.



7
8
9
# File 'lib/full_lengther_next/une_los_hit.rb', line 7

def msgs
  @msgs
end

#number_xObject (readonly)

Returns the value of attribute number_x.



7
8
9
# File 'lib/full_lengther_next/une_los_hit.rb', line 7

def number_x
  @number_x
end

#output_seqObject (readonly)

Returns the value of attribute output_seq.



7
8
9
# File 'lib/full_lengther_next/une_los_hit.rb', line 7

def output_seq
  @output_seq
end

#q_index_startObject (readonly)

Returns the value of attribute q_index_start.



7
8
9
# File 'lib/full_lengther_next/une_los_hit.rb', line 7

def q_index_start
  @q_index_start
end

Instance Method Details

#ajust_nt(nt) ⇒ Object

Returns the number of nt necessary for keep the ORF (or nt becomes multiple of 3)



196
197
198
199
200
201
202
203
204
# File 'lib/full_lengther_next/une_los_hit.rb', line 196

def ajust_nt(nt) # Returns the number of nt necessary for keep the ORF (or nt becomes multiple of 3) 
	add=0
	if nt % 3 == 1
		add = 2
	elsif nt % 3 == 2
		add = 1
	end
	return add
end

#hits_misma_id(full_hit, query_fasta_ori) ⇒ Object

creamos un array en el que esten solo los hits con la misma id.



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/full_lengther_next/une_los_hit.rb', line 156

def hits_misma_id(full_hit, query_fasta_ori)		
	# Se les hace la reverso complementaria si es necesario
	misma_id = []
	query_fasta = query_fasta_ori.dup
	#frame_ori = q.hits.first.q_frame

	full_hit.each do |h|
		if h.acc == full_hit.first.acc
			# comprobar si los frames tienen el mismo sentido
			#if frame_ori < 0 && h.q_frame > 0 || frame_ori > 0 && h.q_frame < 0
				if h.q_frame < 0 # si la secuencia esta al reves le damos la vuelta
					query_fasta = reverse_seq(query_fasta_ori, h)
					h.reversed = true
				end
				misma_id << h
			#end
		end
	end

	return misma_id, query_fasta
end

#overlapped_hits(hit) ⇒ Object

Colapsa cuando hay hsps en diferentes partes de la query pero son lo mismo en el subject con lo cual los eliminamos antes



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/full_lengther_next/une_los_hit.rb', line 103

def overlapped_hits(hit) #Colapsa cuando hay hsps en diferentes partes de la query pero son lo mismo en el subject con lo cual los eliminamos antes
	overlapped_aas = @final_hit.s_end - hit.s_beg + 1
	overlapped_nts = @final_hit.q_end - hit.q_beg + 1
	align_len_final_hit = @final_hit.q_seq.length
	hit_gaps_query = @final_hit.q_seq[align_len_final_hit-overlapped_aas..align_len_final_hit].count('-')
	hit_gaps_subject = @final_hit.s_seq[0..align_len_final_hit].count('-')
	total_gaps = (hit_gaps_query - hit_gaps_subject).abs # Gaps aƱaden aa q no existen, x lo q han de descontarse
	nt_discount = (overlapped_aas ) * 3

	absolute_overlap = 1
	if nt_discount < 0 #Not overlap on subject
           overlapped_aas = overlapped_aas.abs
		absolute_overlap = 0 #Don't cut q_seq and s_seq, because there is query overlap but there isn't subject overlap
		nt_discount =  @final_hit.q_end - hit.q_beg + 1
		nt_discount += ajust_nt(nt_discount)
	end
	add_nt = overlapped_nts + ajust_nt(hit.q_frame-1)
	@output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * add_nt + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]

	#q_seq and s_seq are aa sequences
	final_hit_upper_bound = @final_hit.q_seq.length - 1 - overlapped_aas * absolute_overlap
	@final_hit.q_seq = @final_hit.q_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.q_seq[overlapped_aas * absolute_overlap .. hit.q_seq.length-1]
	@final_hit.s_seq = @final_hit.s_seq[0..final_hit_upper_bound] + 'x' * overlapped_aas + hit.s_seq[overlapped_aas * absolute_overlap .. hit.s_seq.length-1]

	@final_hit.q_end = hit.q_end
	
	@final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
	@final_hit.s_end = [@final_hit.s_end, hit.s_end].max
	@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1		
end

#overlapped_hits_query(hit) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/full_lengther_next/une_los_hit.rb', line 64

def overlapped_hits_query(hit)
	overlapped_nts = @final_hit.q_end - hit.q_beg + 1
	add_nt = ajust_nt(hit.q_frame-1) # Fix frame-shift
	@output_seq = @output_seq[0..@final_hit.q_end-overlapped_nts] + 'n' * (overlapped_nts + add_nt) + @output_seq[@final_hit.q_end+1.. @output_seq.length-1]

	@final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate
	@final_hit.q_end = hit.q_end + add_nt
	
	@final_hit.s_beg = [@final_hit.s_beg, hit.s_beg].min
	@final_hit.s_end = [@final_hit.s_end, hit.s_end].max
	@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
	@final_hit.q_len = @output_seq.length
end

#overlapping_hits?(hit) ⇒ Boolean

Returns:

  • (Boolean)


178
179
180
181
182
183
184
# File 'lib/full_lengther_next/une_los_hit.rb', line 178

def overlapping_hits?(hit)
	overlap = false
	if @final_hit.q_end >= hit.q_beg && @final_hit.q_end < hit.q_end && @final_hit.q_end < hit.q_end
		overlap = true
	end
	return overlap
end

#same_frame_hits(hit) ⇒ Object



92
93
94
95
96
97
98
99
100
101
# File 'lib/full_lengther_next/une_los_hit.rb', line 92

def same_frame_hits(hit)
	add = (hit.s_beg - @final_hit.s_end) + 1
	nt_add = add *3
	@final_hit.q_seq = @final_hit.q_seq + 'x' * add + hit.q_seq
	@final_hit.s_seq = @final_hit.s_seq + 'x' * add + hit.s_seq
	@output_seq = @output_seq[0..@final_hit.q_end-nt_add] + 'n'*nt_add  + @output_seq[hit.q_beg+1..@output_seq.length-1]
	@final_hit.q_end = hit.q_end
	@final_hit.s_end = hit.s_end
	@final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
end

#same_frame_hits_query(hit) ⇒ Object



56
57
58
59
60
61
# File 'lib/full_lengther_next/une_los_hit.rb', line 56

def same_frame_hits_query(hit)
	@final_hit.q_seq = @output_seq[@final_hit.q_beg..@final_hit.q_end].translate
	@final_hit.q_end = hit.q_end
	@final_hit.s_end = hit.s_end
	@final_hit.align_len = hit.s_end - @final_hit.s_beg + 1
end

#separated_hits(hit) ⇒ Object



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/full_lengther_next/une_los_hit.rb', line 134

def separated_hits(hit)			
	number_x = hit.q_beg - @final_hit.q_end - 1
	number_x += ajust_nt(number_x)	

	num_x = ''
	num_x_aa = ''
	if number_x > 0
		num_x = 'n'*number_x
		num_x_aa = 'x'*(number_x/3)
	end
	@output_seq = @output_seq[0..@final_hit.q_end-1] + num_x + @output_seq[hit.q_beg-1..@output_seq.length-1]
       @final_hit.q_seq = @final_hit.q_seq[0, @final_hit.q_seq.length] + num_x_aa + hit.q_seq[0, hit.q_seq.length]
       @final_hit.s_seq = @final_hit.s_seq[0, @final_hit.s_seq.length] + num_x_aa + hit.s_seq[0, hit.s_seq.length]

	@final_hit.q_end = hit.q_end
	@final_hit.s_end = hit.s_end
	@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1	
end

#separated_hits?(hit) ⇒ Boolean

Returns:

  • (Boolean)


186
187
188
189
190
191
192
# File 'lib/full_lengther_next/une_los_hit.rb', line 186

def separated_hits?(hit)
	separated=false
	if @final_hit.q_end < hit.q_beg && hit.q_end > @final_hit.q_end
		separated = true
	end
	return separated
end

#separated_hits_query(hit) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/full_lengther_next/une_los_hit.rb', line 79

def separated_hits_query(hit)			
	separated_nts = hit.q_beg - @final_hit.q_end + 1
	add_nt = ajust_nt(separated_nts) # Fix frame-shift

	@output_seq = @output_seq[0..@final_hit.q_end - separated_nts] + 'n' * (separated_nts + add_nt) + @output_seq[@final_hit.q_end+1..@output_seq.length-1]
	@final_hit.q_seq = @output_seq[@final_hit.q_beg..hit.q_end+add_nt].translate

	@final_hit.q_end = hit.q_end + add_nt
	@final_hit.s_end = hit.s_end
	@final_hit.align_len = @final_hit.s_end - @final_hit.s_beg+1
	@final_hit.q_len = @output_seq.length
end