Module: ZetaHunter

Defined in:
lib/zeta_hunter/version.rb,
lib/zeta_hunter.rb,
lib/zeta_hunter/error/error.rb

Overview

Copyright 2015 - 2017 Ryan Moore Contact: [email protected]

This file is part of ZetaHunter.

ZetaHunter is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

ZetaHunter is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with ZetaHunter. If not, see <www.gnu.org/licenses/>.

Defined Under Namespace

Modules: Error

Constant Summary collapse

VERSION =
"0.1.0"

Instance Method Summary collapse

Instance Method Details

#calc_auto_otu_sim(otus, dists, default_sim) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/zeta_hunter.rb', line 89

def calc_auto_otu_sim otus, dists, default_sim
  otu_sim_info = {}
  otus.each do |otu, seqs|
    if seqs.count == 1
      mean_sim = default_sim
      min_sim = default_sim
    else
      in_otu_dists = []
      seqs.combination(2).each do |s1, s2|
        dist = dists[s1][s2]

        in_otu_dists << dist
      end

      mean_sim =
        (100 - (in_otu_dists.reduce(:+) / in_otu_dists.count * 100)).round

      min_sim =
        (100 - (in_otu_dists.max * 100)).round

    end

    otu_sim_info[otu] = { mean: mean_sim, min: min_sim }
  end

  otu_sim_info
end

#clean_str(str) ⇒ Object



135
136
137
# File 'lib/zeta_hunter.rb', line 135

def clean_str str
  str.strip.gsub(/[^\p{Alnum}_]+/, "_").gsub(/_+/, "_")
end

#find_otu_sim(auto_otu_sim, type, seq2otu, seq) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/zeta_hunter.rb', line 117

def find_otu_sim auto_otu_sim, type, seq2otu, seq
  unless type == :mean || type == :min
    raise Error::ArgumentError, "Incorrect type (#{type})"
  end

  unless seq2otu.has_key? seq
    raise Error::ArgumentError, "seq '#{seq}' is not in seq2otu.keys"
  end

  otu = seq2otu[seq]

  unless auto_otu_sim.has_key? otu
    raise Error::StandardError, "otu '#{otu}' is not in auto_otu_sim.keys"
  end

  auto_otu_sim[otu][type]
end

#otus_from_otu_info_file(info_f) ⇒ Object

Given the DB seqs info file, return a hash tables with OTU =>

seqs

and seq => otu.



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/zeta_hunter.rb', line 69

def otus_from_otu_info_file info_f
  otu2seqs = {}
  seq2otu = {}
  File.open(info_f).each_line do |line|
    unless line.start_with? "#"
      acc, otu, *rest = line.chomp.split "\t"

      seq2otu[acc] = otu

      if otu2seqs.has_key? otu
        otu2seqs[otu] << acc
      else
        otu2seqs[otu] = [acc]
      end
    end
  end

  [otu2seqs, seq2otu]
end

#parse_dist_file(fname) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/zeta_hunter.rb', line 23

def parse_dist_file fname
  all_v_all_dists = {}
  seqs = []
  num_seqs = -1
  File.open(fname, "rt").each_line.with_index do |line, idx|
    if idx.zero?
      num_seqs = line.chomp.to_i
    else
      seq, *these_dists = line.chomp.split "\t"
      seq.strip!
      these_dists.map!(&:to_f)

      seqs << seq

      if these_dists.empty?
        all_v_all_dists[seq] = { seq => 0.0 }
      else
        these_dists.each_with_index do |dist, dist_i|

          if dist_i.zero?
            all_v_all_dists[seq] = { seq => 0.0 }
          end

          other_seq = seqs[dist_i]
          all_v_all_dists[seq][other_seq] = dist
          all_v_all_dists[other_seq][seq] = dist
        end
      end
    end
  end

  unless all_v_all_dists.count == num_seqs
    abort "Dists count must equal num_seqs"
  end

  bool = all_v_all_dists.values.map(&:count).
         all? { |count| count == num_seqs }
  unless bool
    abort "The values of dists are incorrect"
  end

  all_v_all_dists
end