Class: Matrix

Inherits:
Object
  • Object
show all
Extended by:
Resource
Defined in:
lib/rbbt/expression_old/matrix.rb

Constant Summary collapse

MATRIX_DIR =
Matrix.root.find

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data, identifiers, labels = nil, key_field = nil, organism = nil, log2 = false, channel = false) ⇒ Matrix

Returns a new instance of Matrix.



29
30
31
32
33
34
35
36
37
38
39
# File 'lib/rbbt/expression_old/matrix.rb', line 29

def initialize(data, identifiers, labels = nil, key_field = nil, organism = nil, log2 = false, channel = false)
  data.produce if data.respond_to? :produce
  @data        = data
  @samples     = TSV::Parser.new(Open.open(data)).fields
  @identifiers = identifiers
  @labels      = TSV.open(labels) unless labels.nil?
  @key_field   = key_field
  @log2        = log2
  @channel     = channel
  @organism    = organism
end

Instance Attribute Details

#channelObject

Returns the value of attribute channel.



28
29
30
# File 'lib/rbbt/expression_old/matrix.rb', line 28

def channel
  @channel
end

#dataObject

Returns the value of attribute data.



28
29
30
# File 'lib/rbbt/expression_old/matrix.rb', line 28

def data
  @data
end

#identifiersObject

Returns the value of attribute identifiers.



28
29
30
# File 'lib/rbbt/expression_old/matrix.rb', line 28

def identifiers
  @identifiers
end

#key_fieldObject

Returns the value of attribute key_field.



28
29
30
# File 'lib/rbbt/expression_old/matrix.rb', line 28

def key_field
  @key_field
end

#labelsObject

Returns the value of attribute labels.



28
29
30
# File 'lib/rbbt/expression_old/matrix.rb', line 28

def labels
  @labels
end

#log2Object

Returns the value of attribute log2.



28
29
30
# File 'lib/rbbt/expression_old/matrix.rb', line 28

def log2
  @log2
end

#organismObject

Returns the value of attribute organism.



28
29
30
# File 'lib/rbbt/expression_old/matrix.rb', line 28

def organism
  @organism
end

#samplesObject

Returns the value of attribute samples.



28
29
30
# File 'lib/rbbt/expression_old/matrix.rb', line 28

def samples
  @samples
end

Class Method Details

.geo_matrix_for(gds, key_field = nil, organism = nil) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/rbbt/expression_old/matrix.rb', line 15

def self.geo_matrix_for(gds, key_field = nil, organism = nil)
  data    = GEO[gds].values.produce.find
  samples = GEO[gds].samples.produce.find

  dataset_info = GEO[gds]['info.yaml'].produce.yaml
  platform     = dataset_info[:platform]
  identifiers  = GEO[platform].codes.produce.find

  log2         = ["count"].include? dataset_info[:value_type]

  Matrix.new(data, identifiers, samples, key_field, organism, log2)
end

Instance Method Details

#average_label(value, field = nil) ⇒ Object



69
70
71
72
73
# File 'lib/rbbt/expression_old/matrix.rb', line 69

def average_label(value, field = nil)
  samples = find_samples(value, field)
  samples = remove_missing(samples)
  average_samples(samples)
end

#average_samples(samples) ⇒ Object



51
52
53
54
55
56
57
# File 'lib/rbbt/expression_old/matrix.rb', line 51

def average_samples(samples)
  path = Persist.persistence_path(matrix_file, {:dir => File.join(Matrix::MATRIX_DIR, 'averaged_samples')}, {:samples => samples})
  Persist.persist(data, :tsv, :file => path, :no_load => true, :check => [matrix_file]) do
    Expression.average_samples(matrix_file, samples)
  end
  path
end

#barcode(path = nil, factor = 2) ⇒ Object



75
76
77
78
79
80
81
82
# File 'lib/rbbt/expression_old/matrix.rb', line 75

def barcode(path = nil, factor = 2)
  path ||= Persist.persistence_path(matrix_file, {:dir => File.join(Matrix::MATRIX_DIR, 'sample_differences')}, {:main => main, :contrast => contrast, :log2 => log2, :channel => channel})
  Persist.persist(data, :tsv, :file => path, :no_load => true, :check => [matrix_file]) do
    Expression.barcode(matrix_file, path, factor)
    nil
  end
  path
end

#find_samples(value, field = nil) ⇒ Object



59
60
61
62
63
# File 'lib/rbbt/expression_old/matrix.rb', line 59

def find_samples(value, field = nil)
  labels.select(field){|k,v|
    Array === v ? v.flatten.include?(value) : v == value
  }.keys
end

#label_differences(main, contrast = nil, field = nil) ⇒ Object



92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/rbbt/expression_old/matrix.rb', line 92

def label_differences(main, contrast = nil, field = nil)
  all_samples = labels.keys
  main_samples = find_samples(main, field)
  if contrast
    contrast_samples = find_samples(contrast, field)
  else
    contrast_samples = all_samples - main_samples
  end

  main_samples = remove_missing(main_samples)
  contrast_samples = remove_missing(contrast_samples)

  sample_differences(main_samples, contrast_samples)
end

#matrix_file(path = nil) ⇒ Object



41
42
43
44
45
46
47
48
49
# File 'lib/rbbt/expression_old/matrix.rb', line 41

def matrix_file(path = nil)
  path ||= Persist.persistence_path(data, {:dir => Matrix::MATRIX_DIR}, {:identifiers => identifiers, :labels => labels, :key_field => key_field, :organism => organism})
  Persist.persist(data, :tsv, :file => path, :check => [data], :no_load => true) do
    matrix = Expression.load_matrix(data, identifiers, key_field, organism)
    matrix = matrix.select(:key => Organism.sanctioned_genes(organism).list) if matrix.key_field == "Ensembl Gene ID"
    matrix
  end
  path
end

#random_forest_importance(main, contrast = nil, field = nil, options = {}) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/rbbt/expression_old/matrix.rb', line 125

def random_forest_importance(main, contrast = nil, field = nil, options = {})
  features = Misc.process_options options, :features
  features ||= []

  path = Persist.persistence_path(matrix_file, {:dir => File.join(Matrix::MATRIX_DIR, 'random_forest_importance')}, {:main => main, :contrast => contrast, :field => field, :features => features})
  Persist.persist(data, :tsv, :file => path, :no_load => false, :check => [matrix_file]) do
    all_samples = labels.keys
    main_samples = find_samples(main, field)
    if contrast
      contrast_samples = find_samples(contrast, field)
    else
      contrast_samples = all_samples - main_samples
    end


    main_samples     = remove_missing(main_samples)
    contrast_samples = remove_missing(contrast_samples)

    TmpFile.with_file do |result|
      R.run <<-EOF
library(randomForest);
orig = rbbt.tsv('#{matrix_file}');
main = c('#{main_samples * "', '"}')
contrast = c('#{contrast_samples * "', '"}')
features = c('#{features * "', '"}')

features = intersect(features, rownames(orig));
data = t(orig[features, c(main, contrast)])
data = cbind(data, Class = 0)
data[main, "Class"] = 1

rf = randomForest(factor(Class) ~ ., data, na.action = na.exclude)
rbbt.tsv.write(rf$importance, filename='#{ result }', key.field = '#{@key_field}')
      EOF

      TSV.open(result, :type => :single, :cast => :to_f)
    end
  end
end

#remove_missing(samples) ⇒ Object



65
66
67
# File 'lib/rbbt/expression_old/matrix.rb', line 65

def remove_missing(samples)
  @samples & samples
end

#sample_differences(main, contrast) ⇒ Object



84
85
86
87
88
89
90
# File 'lib/rbbt/expression_old/matrix.rb', line 84

def sample_differences(main, contrast)
  path = Persist.persistence_path(matrix_file, {:dir => File.join(Matrix::MATRIX_DIR, 'sample_differences')}, {:main => main, :contrast => contrast, :log2 => log2, :channel => channel})
  Persist.persist(data, :tsv, :file => path, :no_load => true, :check => [matrix_file]) do
    Expression.differential(matrix_file, main, contrast, log2, channel)
  end
  path
end

#signature_set(field, cast = nil) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/rbbt/expression_old/matrix.rb', line 107

def signature_set(field, cast = nil)
  path = Persist.persistence_path(matrix_file, {:dir => File.join(Matrix::MATRIX_DIR, 'signature_set')}, {:field => field, :cast => cast})
  Persist.persist(data, :tsv, :file => path, :no_load => true, :check => [matrix_file]) do
    signatures = TSV.open(matrix_file, :fields => [], :type => :list, :cast => cast)
    labels.values.flatten.uniq.sort.each do |value|
      begin
        s = Signature.tsv_field(label_differences(value), field, cast)
        s.fields = [value]
        signatures.attach s
      rescue Exception
        Log.warn("Signature for #{ value } did not compute")
      end
    end
    signatures
  end
  path
end