Class: PluginUserContaminants

Inherits:
Plugin
  • Object
show all
Defined in:
lib/seqtrimnext/plugins/plugin_user_contaminants.rb

Constant Summary collapse

MAX_TARGETS_SEQS =

MAXIMUM NUMBER OF DIFFERENT ALIGNED SEQUENCES TO KEEP FROM BLAST DATABASE

4

Instance Attribute Summary

Attributes inherited from Plugin

#stats

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Plugin

#add_plugin_stats, #add_stats, #add_text_stats, auto_setup, check_param, #execute, get_graph_filename, get_graph_title, graph_ignored?, ignored_graphs, #initialize, #merge_hits, #overlapX?, plot_setup, valid_graphs

Constructor Details

This class inherits a constructor from Plugin

Class Method Details

.check_params(params) ⇒ Object

Returns an array with the errors due to parameters are missing



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/seqtrimnext/plugins/plugin_user_contaminants.rb', line 141

def self.check_params(params)
  errors=[]


  comment='Blast E-value used as cut-off when searching for contaminations'
  default_value = 1e-10
  params.check_param(errors,'blast_evalue_user_contaminant','Float',default_value,comment)

  comment='Minimum required identity (%) for a reliable user contaminant match'
  default_value = 85
  params.check_param(errors,'blast_percent_user_contaminant','Integer',default_value,comment)

  comment='Minimum hit size (nt) for considering for user contaminant'
  default_value = 30 # era 40
  params.check_param(errors,'min_user_contaminant_size','Integer',default_value,comment)

  comment='Path for user contaminant database'
  default_value = "" #File.join($FORMATTED_DB_PATH,'user_contaminant.fasta')
  params.check_param(errors,'user_contaminant_db','DB',default_value,comment)

  comment='Blast task template for user contaminations'
  #default_value = 'blastn'
  default_value = 'megablast'
  params.check_param(errors,'blast_task_template_user_contaminants','String',default_value,comment)

  comment='Blast extra params for user contaminations'
  #default_value = ''
  default_value = '"-word_size=22"'
  params.check_param(errors,'blast_extra_params_user_contaminants','String',default_value,comment)

  return errors
end

Instance Method Details

#can_execute?Boolean

Returns:

  • (Boolean)


36
37
38
# File 'lib/seqtrimnext/plugins/plugin_user_contaminants.rb', line 36

def can_execute?
  return !@params.get_param('user_contaminant_db').empty?
end

#do_blasts(seqs) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/seqtrimnext/plugins/plugin_user_contaminants.rb', line 41

def do_blasts(seqs)

  # TODO - Culling limit = 2 porque el blast falla con este comando cuando se le pasa cl=1 y dust=no
  # y una secuencia de baja complejidad como entrada

  task_template=@params.get_param('blast_task_template_user_contaminants')
  extra_params=@params.get_param('blast_extra_params_user_contaminants')

  extra_params=extra_params.gsub(/^\"|\"?$/, '')

  blast = BatchBlast.new("-db #{@params.get_param('user_contaminant_db')}",'blastn'," -task #{task_template} #{extra_params} -evalue #{@params.get_param('blast_evalue_user_contaminant')} -perc_identity #{@params.get_param('blast_percent_user_contaminant')} -culling_limit 1")  #get classify -max_target_seqs #{MAX_TARGETS_SEQS}

  $LOG.debug('BLAST:'+blast.get_blast_cmd(:table))

  fastas=[]

  seqs.each do |seq|
    fastas.push ">"+seq.seq_name
    fastas.push seq.seq_fasta
  end


  #blast_table_results = blast.do_blast(fastas,:xml)
  t1=Time.now
  blast_table_results = blast.do_blast(fastas,:table,false)
  add_plugin_stats('execution_time','blast',Time.now-t1)

  t1=Time.now
  #blast_table_results = BlastStreamxmlResult.new(blast_table_results)
  blast_table_results = BlastTableResult.new(blast_table_results)
  add_plugin_stats('execution_time','parse',Time.now-t1)


  return blast_table_results
end

#exec_seq(seq, blast_query) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/seqtrimnext/plugins/plugin_user_contaminants.rb', line 78

def exec_seq(seq,blast_query)
  if blast_query.query_id != seq.seq_name
    raise "Blast and seq names does not match, blast:#{blast_query.query_id} sn:#{seq.seq_name}"
  end

  $LOG.debug "[#{self.class.to_s}, seq: #{seq.seq_name}]: looking for classify into the sequence"

  type = "ActionUserContaminant"

  classify={}
  contaminants=[]

  
  merge_hits(blast_query.hits,contaminants,nil,false)

  begin
    contaminants2=contaminants
    contaminants = []                            #second round to save contaminants without overlap
    merge_hits(contaminants2,contaminants,nil,false)
  end until (contaminants2.count == contaminants.count)
  
  contaminants.sort {|c1,c2| (c1.q_end - c1.q_beg + 1)<=>(c2.q_end - c2.q_beg + 1)}

  # classify=sum_hits_by_id(contaminants.hits)

  actions=[]
  # classify_size=0

  min_cont_size=@params.get_param('min_user_contaminant_size').to_i
  
  # biggest_classify = contaminants.sort {|c1,c2| c1[1]<=>c2[1]}
  
  if !contaminants.empty?

    # definition,classify_size = biggest_classify.last
    
    biggest_contaminant=contaminants.last
    hit_size=(biggest_contaminant.q_end - biggest_contaminant.q_beg + 1)
    
    a = seq.new_action(biggest_contaminant.q_beg,biggest_contaminant.q_end,type) # adds the correspondent action to the sequence

    a.message = biggest_contaminant.definition
    
    seq.add_comment("Contaminated: #{biggest_contaminant.definition}")
    
    a.tag_id = biggest_contaminant.definition.gsub(' ','_')

    # a.found_definition = c.definition    # save the classify definitions, each separately
    
    #save to this file
    seq.add_file_tag(0, 'with_user_contaminant', :both, 10)
    
    actions.push a
    
    add_stats('user_contaminant_size',hit_size)
    add_stats('user_contaminant_ids',biggest_contaminant.definition)

    seq.add_actions(actions)
  end

end

#near_to_extrem(c, seq, min_cont_size) ⇒ Object



17
18
19
20
# File 'lib/seqtrimnext/plugins/plugin_user_contaminants.rb', line 17

def near_to_extrem(c,seq,min_cont_size)
  max_to_extreme=(min_cont_size/2).to_i
  return ((c.q_beg-max_to_extreme<0) || (( c.q_end+max_to_extreme)>=seq.seq_fasta.size-1) ) #return if vector is very near to the extremes of insert)
end

#sum_hits_by_id(hits) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/seqtrimnext/plugins/plugin_user_contaminants.rb', line 22

def sum_hits_by_id(hits)
  res={}
  
  hits.each do |c|
    hit_size=c.q_end - c.q_beg + 1
    
    res[c.definition] = (res[c.definition]||0)+hit_size
    
  end
  
  puts res.to_json
  return res
end