Class: BigSimon::Utils

Inherits:
Object
  • Object
show all
Defined in:
lib/big_simon/utils.rb

Overview

TODO:

These don’t have unit tests yet.

Note:

Skips any duplicate IDs. Only keeps the first one.

Class Method Summary collapse

Class Method Details

.check_file!(fname) ⇒ Object



5
6
7
8
# File 'lib/big_simon/utils.rb', line 5

def self.check_file! fname
  Rya::AbortIf.abort_if fname && !File.exist?(fname),
                        "#{fname} doesn't exist!  Try big_simon --help for help."
end

.check_opt!(opts, arg) ⇒ Object



10
11
12
13
# File 'lib/big_simon/utils.rb', line 10

def self.check_opt! opts, arg
  Rya::AbortIf.abort_unless opts.send(:fetch, "#{arg}_given".to_sym),
                            "You must specify --#{arg.to_s.tr('_', '-')}.  Try big_simon --help for help."
end

.rcode(fnames) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/big_simon/utils.rb', line 15

def self.rcode fnames
  functions = %Q|
  library(reshape2)
library(gplots)
library(RColorBrewer)

file.join <- function(...) {
paste(..., sep="/")
}

draw.heatmap <- function(infname, outfname) {
dat <- read.table(infname, header=T, sep="\t")

wide.dat <- dcast(dat, host ~ virus, value.var="score")

hosts <- wide.dat[, 1]
scores <- wide.dat[, 2:ncol(wide.dat)]
scores.numeric <- apply(scores, 2, as.numeric)

scores.matrix <- as.matrix(scores.numeric)

rownames(scores.matrix) <- hosts

palette <- "YlOrBr"
col <- colorRampPalette(brewer.pal(n=9, palette))(n = 25)
size <- 0.75

pdf(outfname, height=5, width=8)

heatmap.2(scores.matrix,
          trace="none", ## Disable those wonky lines.
          col=col, ## Set the color.

          ## Size opts
          margins=c(11, 11), cexRow=size, cexCol=size,

          ## Key labeling
          key.xlab="Score")

invisible(dev.off())
}

|

  drawing = fnames.map do |in_fname, out_fname|
    %Q{

draw.heatmap("#{in_fname}", "#{out_fname}")
}
  end.join

  [functions, drawing].join "\n"
end

.scale_log_likelihood(ll) ⇒ Object



69
70
71
# File 'lib/big_simon/utils.rb', line 69

def self.scale_log_likelihood ll
  1 - Math.exp(ll)
end

.set_up_tmp_dirs(fastas, tmpdir, which) ⇒ Object

Note:

I also rename all the sequences in the tmp fasta files with the new ID.



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/big_simon/utils.rb', line 74

def self.set_up_tmp_dirs fastas, tmpdir, which
  Object::FileUtils.mkdir_p tmpdir

  seq_lengths = {}

  name_map = {}
  all_ids  = Set.new

  seq_num = -1
  fastas.each do |fname|
    ParseFasta::SeqFile.open(fname).each_record do |rec|
      if all_ids.include? rec.id
        Rya::AbortIf.logger.warn { "#{rec.id} was seen more than one time!  Duplicate organism IDs are not allowed, so we will only keep the first one." }
      else
        all_ids << rec.id

        seq_num += 1

        new_id           = "#{which}_#{seq_num}"
        name_map[new_id] = rec.id

        outfname = File.join tmpdir, "#{new_id}.fa"

        seq_lengths[new_id] = rec.seq.length

        File.open(outfname, "w") do |f|
          f.puts ">#{new_id}\n#{rec.seq}" # TODO HERE
        end
      end
    end
  end

  [name_map, all_ids, seq_lengths]
end

.strip_suffix(fname) ⇒ Object



109
110
111
# File 'lib/big_simon/utils.rb', line 109

def self.strip_suffix fname
  fname.sub /.fasta$|.fa$/, ""
end