Module: ViennaRna::Utils

Defined in:
lib/vienna_rna/modules/utils.rb

Class Method Summary collapse

Class Method Details

.fastas_from_file(path) ⇒ Object



4
5
6
7
# File 'lib/vienna_rna/modules/utils.rb', line 4

def fastas_from_file(path)
  # Force it to not be lazy.
  Bio::FlatFile.auto(path).to_enum.map { |fasta| fasta }
end

.histogram(data, title = "", options = {}) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/vienna_rna/modules/utils.rb', line 87

def histogram(data, title = "", options = {})
  bin_size = options.delete(:bin_size) || 1
  half     = bin_size / 2.0
  range    = Range.new((data.min - half).floor, (data.max + half).ceil)
  groups   = (range.min + half).step(range.max, bin_size).map { |x| [x, data.count { |i| i >= x - half && i < x + half }] }
  
  options.merge!(output: "file") if options[:filename]
  options[:plot] = (options[:plot] || {}).merge({
    title:  title,
    yrange: "[0:#{groups.map(&:last).max * 1.1}]",
    xtics:  "#{[bin_size, 5].max}",
    style:  "fill solid 0.5 border"
  })
  
  plot([{ x: groups.map(&:first), y: groups.map(&:last), style: "boxes" }], options)
end

.plot(data, options = {}) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/vienna_rna/modules/utils.rb', line 33

def plot(data, options = {})
  Gnuplot.open do |gnuplot|
    Gnuplot::Plot.new(gnuplot) do |plot|
      plot.autoscale
      
      case options[:output]
      when /file/i then
        plot.output(options[:filename])
        plot.terminal("png size %s" % (options[:dimensions] || "800,600"))
      end
      
      (options[:plot] || {}).keys.each do |option|
        plot.send(option, options[:plot][option])
      end

      plot.data = data.map do |data_hash|
        Gnuplot::DataSet.new([data_hash[:x], data_hash[:y]]) do |dataset|
          dataset.with      = data_hash[:style] || "points"
          dataset.linecolor = "rgb '#{data_hash[:color]}'" if data_hash[:color]

          data_hash[:title] ? dataset.title = data_hash[:title] : dataset.notitle
        end
      end
    end
  end
end

.quick_overlay(data, title = "", options = {}) ⇒ Object



122
123
124
125
126
127
128
# File 'lib/vienna_rna/modules/utils.rb', line 122

def quick_overlay(data, title = "", options = {})
  # [{ data: [[x_0, y_0], [x_1, y_1], ...], label: "Label" }, { data: [[x_0, y_0], [x_1, y_1], ...] }]
  options[:plot] = ((options[:plot] || {}).merge(title: title))
  options.merge!(output: "file") if options[:filename]
  
  plot(data.map { |hash| { title: hash[:label], x: hash[:data].map(&:first), y: hash[:data].map(&:last), style: "linespoints" }.merge(hash[:options] || {}) }, options)
end

.quick_plot(data, title = "", options = {}) ⇒ Object



118
119
120
# File 'lib/vienna_rna/modules/utils.rb', line 118

def quick_plot(data, title = "", options = {})
  quick_overlay([{ data: data }], title, options)
end

.regress(x, y, degree) ⇒ Object



25
26
27
28
29
30
31
# File 'lib/vienna_rna/modules/utils.rb', line 25

def regress(x, y, degree)
  x_data   = x.map { |i| (0..degree).map { |power| i ** power.to_f } }
  x_matrix = Matrix[*x_data]
  y_matrix = Matrix.column_vector(y)

  ((x_matrix.transpose * x_matrix).inverse * x_matrix.transpose * y_matrix).transpose.to_a[0]
end

.roc(data, title = "", options = {}) ⇒ Object



104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/vienna_rna/modules/utils.rb', line 104

def roc(data, title = "", options = {})
  # data = [[true_score_1, true_score_2, ...], [false_score_1, false_score_2, ...]]~
  roc_curve = ROC.curve_points({ 1 => data[0], -1 => data[1] }.inject([]) { |data, (truth, values)| data.concat(values.map { |i| [i, truth] })})
  area      = roc_curve.each_cons(2).inject(0) do |sum, (a, b)| 
    delta_x, delta_y = b[0] - a[0], b[1] - a[1]
    sum + (delta_x * delta_y / 2 + delta_x * [a[1], b[1]].min)
  end
  
  options.merge!(output: "file") if options[:filename]
  options.merge!({ plot: { title: "%s %s %.4f" % [title, "AUC:", area] } })
  
  plot([{ x: roc_curve.map(&:first), y: roc_curve.map(&:last), style: "lines" }], options)
end

.splot(data, options = {}) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/vienna_rna/modules/utils.rb', line 60

def splot(data, options = {})
  # [[x_1, y_1, z_1], [x_2, y_2, z_2], ...]
  orthogonal_data = data.inject([[], [], []]) { |array, list| array.zip(list).map { |a, e| a << e } }
  
  Gnuplot.open do |gnuplot|
    Gnuplot::SPlot.new(gnuplot) do |plot|
      plot.autoscale
      
      case options[:output]
      when /file/i then
        plot.output(options[:filename])
        plot.terminal("png size 800,600")
      end
      
      (options[:plot] || {}).keys.each do |option|
        plot.send(option, options[:plot][option])
      end

      plot.data = [
        Gnuplot::DataSet.new(orthogonal_data) do |dataset|
          dataset.with = options[:style] || "lines"
        end
      ]
    end
  end
end

.write_fastas!(fastas, directory, base_name, group_size = 10) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/vienna_rna/modules/utils.rb', line 9

def write_fastas!(fastas, directory, base_name, group_size = 10)
  fastas.each_slice(group_size).each_with_index do |fasta_group, i|
    path = File.join(directory, base_name + "_#{i}.fa")
    
    unless File.exists?(path)
      File.open(path, "w") do |file|
        fasta_group.each do |folding|
          file.write(">%s\n%s\n" % [folding.fasta.definition, folding.fasta.seq])
        end
      end
    else
      puts "Warning: file '#{path}' exists. Skipping."
    end
  end
end