Class: StatsReport

Inherits:
Object
  • Object
show all
Defined in:
lib/seqtrimnext_report/classes/stats_report.rb

Instance Method Summary collapse

Constructor Details

#initialize(all_params, initial_stats, stats, plugin_nts_hash, output_folder, output_latex) ⇒ StatsReport

Returns a new instance of StatsReport.



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/seqtrimnext_report/classes/stats_report.rb', line 3

def initialize(all_params,initial_stats,stats,plugin_nts_hash,output_folder,output_latex)
  
  output2=File.open(File.join(output_latex,'stats.tex'), 'w')
  output2.puts "%!TEX root = FinalReport.tex"
  
  if (stats['sequences'].nil?) || (stats['sequences']['count'].nil?)
    puts "sequences info does not exist in stats.json\n"
    exit(-1)
  end
  
  input_seqs = stats['sequences']['count']['input_count'].to_i
  rejected_seqs = stats['sequences']['count']['rejected'].to_i
  output_seqs = stats['sequences']['count']['output_seqs'].to_i

  #------------------------------------------------------------------------------------------ solo cuando hay pareadas
  output_seqs_paired = 0
  total_output_seqs = 0

  if (!stats['sequences']['count']['output_seqs_paired'].nil?)
    output_seqs_paired = stats['sequences']['count']['output_seqs_paired'].to_i
    total_output_seqs = output_seqs_paired+output_seqs
  end
  #-------------------------------------------------------------------------------------------------------------------
  low_complex = 0
  if (!stats['sequences']['count']['output_seqs_low_complexity'].nil?)
    low_complex = stats['sequences']['count']['output_seqs_low_complexity'].to_i # solo cuando hay baja complejidad (no hay cuando es genomico)
  end
  # graph files ----------------------------------------------------
  
   # if File.exist?(File.join(output_latex,'graphs','size_stats.png'))
    output2.puts '\input{input_graph}'
   # end
  
   # if File.exist?(File.join(output_latex,'graphs','qualities.png'))
    output2.puts '\input{qv_graph}'
   # end
  
   # if File.exist?(File.join(output_latex,'graphs','PluginExtractInserts_insert_size.png'))
    output2.puts '\input{output_graph}'+"\n\n"
   # end
  #------------------------------------------------------------------
  
  (input_mode, output_mode) = get_mode(initial_stats,stats)
  (input_mean, output_mean) = get_mean(initial_stats,stats)
  
  #--------------------------------------------------------------------------- build table
  output2.puts '\begin{table}[H]'
  output2.puts '\begin{center}'
  output2.puts '\begin{tabular}{l r r}'
  output2.puts " \\hline"
  if (!input_seqs.nil?)
    output2.puts "Input reads: & total & #{input_seqs} \\\\"
  end
  if (!initial_stats['smallest_sequence_size'].nil?)
    output2.puts " & Smallest read (bp) & #{initial_stats['smallest_sequence_size'].to_i} \\\\"
  end
  if (!initial_stats['biggest_sequence_size'].nil?)
    output2.puts " & Largest read (bp)& #{initial_stats['biggest_sequence_size'].to_i} \\\\"
  end
  output2.puts " & Mode (bp) & #{input_mode} \\\\"
  output2.puts " & Mean (bp)& #{input_mean} \\\\"

  output2.puts " \\\\ \\hline"
  output2.puts "Output results: & total & #{output_seqs} \\\\"
  output2.puts " & Rejected & #{rejected_seqs} \\\\"
  if (low_complex != 0)
    output2.puts " & Low complexity reads & #{low_complex} \\\\"
  end
  output2.puts " & Mode (bp)& #{output_mode} \\\\"
  output2.puts " & Mean (bp)& #{output_mean} \\\\"
  
  #-------------------------------------------------- solo cuando hay pareadas
  output2.puts "\\\\"
  if (output_seqs_paired != 0)
    output2.puts " & Output paired reads & #{output_seqs_paired} \\\\"
    output2.puts " & Total output reads & #{total_output_seqs} \\\\"
    output2.puts "\\\\ \\hline"
    output2.puts "Linkers: & & \\\\"
    
    if (!stats['PluginLinker'].nil?)
      if (!stats['PluginLinker']['linker_id'].nil?)
        stats['PluginLinker']['linker_id'].each do |linker|
          output2.puts " & #{linker[0]} & #{linker[1]} \\\\"
        end
      end
      output2.puts "\\\\ \\hline"
      if (!stats['PluginLinker']['without_linker'].nil?)
        output2.puts "Without linkers: & total & #{stats['PluginLinker']['without_linker']['0']} \\\\"
      end
      
      output2.puts "\\\\ \\hline"
      output2.puts "Multiple linkers: & & \\\\"

      if (!stats['PluginLinker']['multiple_linker_id'].nil?)
        stats['PluginLinker']['multiple_linker_id'].each do |linker|
          output2.puts " & #{linker[0]} & #{linker[1]} \\\\"
        end
      end
      if (!stats['PluginLinker']['multiple_linker_count'].nil?)
        stats['PluginLinker']['multiple_linker_count'].each do |linker|
          output2.puts " & With #{linker[0]} linkers & #{linker[1]} \\\\"
        end
      end
    end
  end
  #--------------------------------------------------- end pareadas
  
  output2.puts "\\hline"
  
  output2.puts '\end{tabular}'
  output2.puts '\label{table:nonlin}'
  output2.puts '\end{center}'
  output2.puts '\end{table}'+"\n\n"
  #------------------------------------------------------------------------------- end table

  #-------------------------------------------------- MIDs
  if (!stats['PluginMids'].nil?) && (!stats['PluginMids']['mid_id'].nil?)
    mid_seqs = stats['PluginMids']['mid_id']['total']
    mid_seqs_percent = sprintf("%0.3f", (mid_seqs.to_f*100/input_seqs.to_f))
    output2.puts '\noindent \begin{minipage}{\linewidth}'
    output2.puts "number of reads with MID: #{mid_seqs} \(#{mid_seqs_percent}\\%\)"+'\\\\'+'\\\\'

    if (mid_seqs_percent.to_f <= 1)
      output2.puts '\fcolorbox{black}{yellow}{'+"\n"+'\begin{minipage}{\linewidth}{'+"\n"+'\textbf{WARNING: The number of reads with MID is so low that can be interpreted as a random finding. Your useful sequences are in the no\_MID folder, but you can also add any read classified as having a MID}'+"\n"+'}'+"\n"+'\end{minipage}'+"\n"+'}\\\\\\\\'
    end
    output2.puts '\end{minipage}'+"\n\n"
  end
  #----------------------------------------------------------------------------

  #------------------------------------------------------- make top five tables
  if !(stats['PluginVectors']).nil?
    if !(top_hash = stats['PluginVectors']['vectors_ids']).nil?
      make_a_top_five(output2, top_hash, 'Vectors')
    end
  end
  
  if !(stats['PluginAbAdapters']).nil?
    if !(top_hash = stats['PluginAbAdapters']['adapter_id']).nil?
      make_a_top_five(output2, top_hash, 'Adapters')
    end
  end
  
  if !(stats['PluginContaminants']).nil?
    if !(top_hash = stats['PluginContaminants']['contaminants_ids']).nil?
      make_a_top_five(output2, top_hash, 'Contaminants')
    end
  end
  #--------------------------------------------------------------------------
  
  # en las pareadas añadimos el inserto de izq y derecha ------------------------------------- solo cuando hay pareadas
  paired_nts=0
  if (stats['PluginExtractInserts']['left_insert_size'])
    stats['PluginExtractInserts']['left_insert_size'].each do |element|
      paired_nts += element[0].to_i*element[1].to_i
    end
  end
  if (stats['PluginExtractInserts']['right_insert_size'])
    stats['PluginExtractInserts']['right_insert_size'].each do |element|
      paired_nts += element[0].to_i*element[1].to_i
    end
  end
  #-------------------------------------------------------------------------------------------------------------------
  
  nts_total = initial_stats['nucleotide_count']
  print_trimmed_nts_stats_table(stats, output2, plugin_nts_hash,nts_total,paired_nts)
  
  output2.close

  puts "Statistic information was added to the report"
  
end

Instance Method Details

#get_mean(initial_stats, stats) ⇒ Object



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/seqtrimnext_report/classes/stats_report.rb', line 203

def get_mean(initial_stats,stats)
  output_mean = 0

  # take the mean from initial_stats.json
  if (!initial_stats.nil? and !initial_stats.empty?)
    input_mean = sprintf("%0.1f", (initial_stats['mean_of_sequence_sizes']))
  else
    input_mean = 0
  end
    
  # calculate the mean using data from stats.json
  nts_count = 0
  seqs_count = 0
  if (!stats['PluginExtractInserts']['insert_size'].nil?)
    stats['PluginExtractInserts']['insert_size'].each do |key,value|
      seqs_count += value.to_i
      nts_count += (key.to_f*value)
    end
  
    if (nts_count == 0 || seqs_count == 0)
      output_mean = 0
    else
      output_mean = sprintf("%0.1f", (nts_count/seqs_count))
    end
  else
    output_mean = 0
  end
  
  return [input_mean, output_mean]
end

#get_mode(initial_stats, stats) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/seqtrimnext_report/classes/stats_report.rb', line 175

def get_mode(initial_stats,stats)
  output_mode = 0
  mode_array = []
  
  # take the mode from initial_stats.json
  if (!initial_stats.nil? and !initial_stats.empty?)
    input_mode = initial_stats['mode_of_sizes']
  else
    input_mode = 0
  end
  
  # calculate the mode using data from stats.json
  if (!stats['PluginExtractInserts']['insert_size'].nil?)
    stats['PluginExtractInserts']['insert_size'].each do |key,value|
      mode_array[key.to_i]=value
    end
  
    mode_array.map!{|e| e || 0}
    s=ScbiStats.new(mode_array)
  
    output_mode = s.fat_mode
  else
    output_mode = 0
  end
  
  return [input_mode, output_mode]
end

#make_a_top_five(output2, top_hash, name) ⇒ Object



234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/seqtrimnext_report/classes/stats_report.rb', line 234

def make_a_top_five(output2,top_hash,name)
#-------------------------------------------------- build table
  output2.puts '\begin{table}[H]'
  output2.puts '\ccaption{'+"List of the most frequent~#{name}~found among your reads"+'}'
  output2.puts '\vspace{-0.5cm}'
  output2.puts '\begin{center}'
  output2.puts '\begin{tabular}{|p{11cm}|r|}'
  output2.puts '\hline'
  output2.puts "#{name} " +'& sequences \\\\ [0.5ex]'
  output2.puts '\hline'

  cont = 0
  top_hash.sort{|a,b| b[1]<=>a[1]}.each do |elem|
    tmp_name = elem[0].gsub('_','\_')
    output2.puts "#{tmp_name} \& #{elem[1]}"+'\\\\'
    cont+=1
    if (cont == 5)
      break
    end
  end

  output2.puts '\hline'
  output2.puts '\end{tabular}'
  output2.puts '\end{center}'
  # output2.puts '\label{table:top5}'
  output2.puts '\end{table}'+"\n\n"
#-------------------------------------------------- end table
end


263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'lib/seqtrimnext_report/classes/stats_report.rb', line 263

def print_trimmed_nts_stats_table(stats, output2, plugin_nts_hash, nts_total,paired_nts)

  nts_table_hash = {}
  insert_array = []
  warning_array = []
  
  plugin_nts_hash.each do |plugin|
    
    my_name = plugin[0]
    plugin_name = plugin[1]['plugin']
    plugin_field = plugin[1]['field']
    plugin_msg = plugin[1]['msg']
    plugin_threshold = plugin[1]['threshold']
    plugin_warning = plugin[1]['warning']
    
    if (!stats[plugin_name].nil?)
      if (!stats[plugin_name][plugin_field].nil?)
          
        count = 0
        stats[plugin_name][plugin_field].each do |element|
          count += element[0].to_i*element[1].to_i
        end
        
        if (plugin_name == 'PluginExtractInserts') && (plugin_field == 'insert_size') && (paired_nts > 0)
          count += paired_nts
        end
        
        my_percent = sprintf("%0.3f", (count.to_f*100/nts_total.to_f))
      
        if (plugin_name == 'PluginExtractInserts')
          if (my_percent.to_f <= plugin_threshold)
            plugin_msg.gsub!('my_percent',"#{my_percent}")
            insert_array.push '\noindent \fcolorbox{black}{pink}{'+"\n"+'\begin{minipage}{\linewidth}{'+"\n"+'\textbf{'+"#{plugin_warning}  #{plugin_msg}"+'}'+"\n"+'}'+"\n"+'\end{minipage}'+"\n"+'}\\\\\\\\'
          else
            plugin_warning = 'OK'
          end
        else
          if (my_percent.to_f >= plugin_threshold)
            plugin_msg.gsub!('my_percent',"#{my_percent}")
            warning_array.push '\noindent \fcolorbox{black}{yellow}{'+"\n"+'\begin{minipage}{\linewidth}{'+"\n"+'\textbf{'+"#{plugin_warning}  #{plugin_msg}"+'}'+"\n"+'}'+"\n"+'\end{minipage}'+"\n"+'}\\\\\\\\'
          else
            plugin_warning = 'OK'
          end
        end
      
        nts_table_hash[plugin_field] = ["#{my_name}&#{count}&#{my_percent} \\%&#{plugin_warning}\\\\",my_percent]
      
        # puts "#{plugin_name} #{plugin_field} #{count}"
      end
    end
  end
  
#-------------------------------------------------- build table
  output2.puts '\begin{table}[H]'
  output2.puts '\ccaption{Summary of nucleotides removed in every plugin.}'
  output2.puts '\begin{center}'
  output2.puts '\begin{tabular}{l r r c}'
  output2.puts '\hline'
  output2.puts 'Plugin & Nucleotides & Percent & Warnings \\\\ [0.5ex]'
  output2.puts '\hline'

  #the hash of hashes is ordered by value (number of sequences rejected)
  nts_table_ordered = nts_table_hash.sort {|a,b| b[1][1].to_i<=>a[1][1].to_i}

  nts_table_ordered.each do |element|
    if (element[0] != 'insert_size')
      output2.puts element[1][0]
    end
  end
  
  output2.puts '\hline'
  if (!nts_table_hash['insert_size'].nil?)
    output2.puts nts_table_hash['insert_size'][0]
  end
  output2.puts '\hline'
  output2.puts '\end{tabular}'
  output2.puts '\label{table:nonlin}'
  output2.puts '\end{center}'
  output2.puts '\end{table}'+"\n\n"
#-------------------------------------------------- end table

  output2.puts '\noindent \begin{minipage}{\textwidth}'
  
  output2.puts insert_array.join("\n")
  output2.puts warning_array.join("\n")
  
  output2.puts '\end{minipage}'+"\n\n"
  
end