Class: PluginVectors

Inherits:
Plugin show all
Defined in:
lib/seqtrimnext/plugins/plugin_vectors.rb

Overview

Author: Almudena Bocinos Rioboo

Defines the main methods that are necessary to execute PluginVectors

Inherit: Plugin

Constant Summary collapse

MAX_TARGETS_SEQS =

MIN_VECTOR_SIZE=30

MAX_TO_EXTREME=(MIN_VECTOR_SIZE/2).to_i
20

Instance Attribute Summary

Attributes inherited from Plugin

#stats

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Plugin

#add_plugin_stats, #add_stats, #add_text_stats, auto_setup, #can_execute?, check_param, #execute, get_graph_filename, get_graph_title, graph_ignored?, ignored_graphs, #initialize, #merge_hits, #overlapX?, plot_setup, valid_graphs

Constructor Details

This class inherits a constructor from Plugin

Class Method Details

.check_params(params) ⇒ Object

Returns an array with the errors due to parameters are missing



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/seqtrimnext/plugins/plugin_vectors.rb', line 161

def self.check_params(params)
  errors=[] 
  

  comment='Blast E-value used as cut-off when searching for vector fragments'
default_value = 1e-1
params.check_param(errors,'blast_evalue_vectors','Float',default_value,comment)

comment='Minimum required identity (%) for a reliable vector fragment'
default_value = 90
params.check_param(errors,'blast_percent_vectors','Integer',default_value,comment)

  comment='Correct sequences could contain vectors only close to the read end (not within the sequence). The following variable indicates the number of nucleotides from the 5\' or 3\' end that are allowed for considering a vector fragment located at the end. Otherwise, the vector fragment will be qualified as internal and the sequence will be rejected'
default_value = 8
params.check_param(errors,'max_vector_to_end','Integer',default_value,comment)

comment='If a vector fragment is qualified as internal, the fragment should be long enough to be sure that it is a true vector fragment. This is the minimum length of a vector fragment that enables sequence rejection by an internal, unexpected vector'
default_value = 50
params.check_param(errors,'min_vector_seq_presence','Integer',default_value,comment)


  comment='Vectors database path'
default_value = File.join($FORMATTED_DB_PATH,'vectors.fasta')
params.check_param(errors,'vectors_db','DB',default_value,comment)

  comment='Rejects sequences with vectors in the middle'
  default_value = 'true'
  params.check_param(errors,'middle_vector_rejects','String',default_value,comment)


  # params.split_databases('vectors_db')
  
  return errors
end

Instance Method Details

#all_vector_in_linker(vector_beg, vector_end, seq) ⇒ Object



21
22
23
24
25
26
# File 'lib/seqtrimnext/plugins/plugin_vectors.rb', line 21

def all_vector_in_linker(vector_beg,vector_end,seq)
  linkers=seq.get_actions(ActionLinker) 
  # res=((linkers.count>=1) && (vector_beg>=linkers[0].start_pos) && (vector_end<=linkers[0].end_pos)) 
  # puts " RES #{res}  insert-start #{seq.insert_start} #{linkers.count}>=1 #{vector_beg+seq.insert_start}>=#{linkers[0].start_pos}) && #{vector_end+seq.insert_start}<=#{linkers[0].end_pos})) "
  return  ((linkers.count>=1) && (vector_beg+seq.insert_start>=linkers[0].start_pos) && (vector_end+seq.insert_start<=linkers[0].end_pos))
end

#do_blasts(seqs) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/seqtrimnext/plugins/plugin_vectors.rb', line 28

def do_blasts(seqs)
   # find MIDS  with less results than max_target_seqs value 
   blast = BatchBlast.new("-db #{@params.get_param('vectors_db')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_vectors')} -perc_identity #{@params.get_param('blast_percent_vectors')} -culling_limit 1")  #get vectors

   $LOG.debug('BLAST:'+blast.get_blast_cmd)

   fastas=[]

   seqs.each do |seq|
    fastas.push ">"+seq.seq_name
    fastas.push seq.seq_fasta
   end

   # fastas=fastas.join("\n")

   #blast_table_results = blast.do_blast(fastas,:xml)
   
   t1=Time.now
   blast_table_results = blast.do_blast(fastas,:table,false)
   add_plugin_stats('execution_time','blast',Time.now-t1)

   t1=Time.now
   #blast_table_results = BlastStreamxmlResult.new(blast_table_results)
   blast_table_results = BlastTableResult.new(blast_table_results)
   add_plugin_stats('execution_time','parse',Time.now-t1)


   # puts blast_table_results.inspect

   return blast_table_results
end

#exec_seq(seq, blast_query) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/seqtrimnext/plugins/plugin_vectors.rb', line 61

def exec_seq(seq,blast_query)
 if blast_query.query_id != seq.seq_name
   raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
 end

  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for vectors into the sequence " 

  #blast contra contaminantes
  
  # blast = BatchBlast.new("-db #{File.join($FORMATTED_DB_PATH,'vectors.fasta')}",'blastn'," -task blastn-short -evalue #{@params.get_param('blast_evalue_vectors')} -perc_identity #{@params.get_param('blast_percent_vectors')} -culling_limit 1")  #get vectors
 
  # blast_table_results = blast.do_blast(seq.seq_fasta,:xml)             #rise seq to contaminants  executing over blast
  type = "ActionVectors"
          
  # puts res
  #     blast_table_results.inspect 
  # blast_table_results.querys.each do |query|     # adds the correspondent action to the sequence
  #      query.hits.each do |hit|
  #        seq.add_action(hit.q_beg,hit.q_end,type)
  #      end
  #    end
  
vectors=[]
vectors_ids=[]
# blast_table_results.querys.each do |query|     # first round to save vectors without overlap     
    # vectors_ids.push query.hits.subject_id if (not vectors_ids.include?(query.hits.subject_id)) 
    merge_hits(blast_query.hits,vectors,vectors_ids)
# end 



begin 
  vectors2=vectors                            # second round to save vectors without overlap
  vectors = []
  merge_hits(vectors2,vectors)
end until (vectors2.count == vectors.count) 

	
actions = [] 
vectors_size=0
min_vector_size=@params.get_param('min_vector_seq_presence').to_i 

vectors.each do |v|                           # adds the correspondent action to the sequence
   
  	 #puts "*VECTOR* #{v.subject_id[0..40].ljust(40)} #{v.q_beg.to_s.rjust(6)} #{v.q_end.to_s.rjust(6)} #{v.s_beg.to_s.rjust(6)} #{v.s_end.to_s.rjust(6)}"

   vector_size=v.q_end-v.q_beg+1  
 # puts " in PLUGIN VECTOR previous to add action #{seq.insert_start} #{seq.insert_end}"   
  # if ((vector_size>=MIN_VECTOR_SIZE) || ((vector_size<MIN_VECTOR_SIZE) && near_to_extrem(v,seq)))
  if (near_to_extrem(v,seq,10) || (vector_size>=min_vector_size)   )
    # puts " near #{near_to_extrem(v,seq,min_vector_size)} #{vector_size}>=#{min_vector_size}"
    #c.q_end+seq.insert_start+max_to_end)>=seq.seq_fasta_orig.size-1)  #if ab adapter is very near to the end of original sequence  
    
    piro_on=@params.get_param('next_generation_sequences').to_s
    
    if (((piro_on=='true')  && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionLinker)) && (!seq.range_inside_action_type?(v.q_beg,v.q_end,ActionMultipleLinker)))    ||   # if vectors DB not is contained inside detected linkers
         (piro_on=='false'))

		 # if vector is too big, and it isn't in an extreme, then it is an unexpected vector
       if !near_to_extrem(v,seq,min_vector_size)
     		type = 'ActionUnexpectedVector'

        if @params.get_param('middle_vector_rejects').to_s=='true'
  			  seq.seq_rejected=true
				  seq.seq_rejected_by_message='unexpected vector'
        end
		  
		  add_stats('rejected','unexpected_vector')

     	 end

       
       a = seq.new_action(v.q_beg,v.q_end,type)
       a.message = v.definition  
       # a.found_definition.push  v.subject_id    # save the vectors definitions, each separately    
       a.found_definition=vectors_ids    # save the vectors definitions, each separately   
       a.reversed = v.reversed        
       a.cut=false if (piro_on=='true')  # vectors don't cut when piro is on  
        
       # puts "piro on #{piro_on}  vector cut #{a.cut}  ________________|||||||||| " 
       #          puts " no piro" if (piro_on=='false')    
        
       actions.push a   

       # @stats[:vector_size]={vector_size => 1} 
       add_stats('vector_size',vector_size) 
       vectors_ids.each do |v|
         add_stats('vectors_ids',v)
       end
    end
  end
  
end 

seq.add_actions(actions) 
#
      
end

#near_to_extrem(c, seq, min_vector_size) ⇒ Object

MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE



16
17
18
19
# File 'lib/seqtrimnext/plugins/plugin_vectors.rb', line 16

def near_to_extrem(c,seq,min_vector_size)
  max_to_extreme=(min_vector_size/2).to_i
  return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
end