Module: Study

Extended by:
Entity, Resource
Includes:
LocalPersist
Defined in:
lib/rbbt/entity/study.rb,
lib/rbbt/entity/study/cnv.rb,
lib/rbbt/entity/study/cnv.rb,
lib/rbbt/entity/study/snp.rb,
lib/rbbt/entity/study/snp.rb,
lib/rbbt/entity/study/samples.rb,
lib/rbbt/entity/study/cnv/genes.rb,
lib/rbbt/entity/study/genotypes.rb,
lib/rbbt/entity/study/methylation.rb,
lib/rbbt/entity/study/methylation.rb,
lib/rbbt/entity/study/knowledge_base.rb,
lib/rbbt/entity/study/genotypes/genes.rb,
lib/rbbt/entity/study/genotypes/mutations.rb,
lib/rbbt/entity/study/genotypes/knowledge_base.rb

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Attribute Details

.knowledge_baseObject

Returns the value of attribute knowledge_base.



4
5
6
# File 'lib/rbbt/entity/study/knowledge_base.rb', line 4

def knowledge_base
  @knowledge_base
end

.study_dirObject

Returns the value of attribute study_dir.



49
50
51
# File 'lib/rbbt/entity/study.rb', line 49

def study_dir
  @study_dir
end

.study_registryObject

Returns the value of attribute study_registry.



4
5
6
# File 'lib/rbbt/entity/study/knowledge_base.rb', line 4

def study_registry
  @study_registry
end

Instance Attribute Details

#dirObject

Returns the value of attribute dir.



64
65
66
# File 'lib/rbbt/entity/study.rb', line 64

def dir
  @dir
end

#knowledge_baseObject

Returns the value of attribute knowledge_base.



14
15
16
# File 'lib/rbbt/entity/study/knowledge_base.rb', line 14

def knowledge_base
  @knowledge_base
end

#organismObject

{{{ Attributes



122
123
124
# File 'lib/rbbt/entity/study.rb', line 122

def organism
  @organism
end

#watsonObject

Returns the value of attribute watson.



93
94
95
# File 'lib/rbbt/entity/study/genotypes.rb', line 93

def watson
  @watson
end

#workflow(&block) ⇒ Object

Returns the value of attribute workflow.



64
65
66
# File 'lib/rbbt/entity/study.rb', line 64

def workflow
  @workflow
end

Class Method Details

.annotation_repoObject



86
87
88
# File 'lib/rbbt/entity/study.rb', line 86

def self.annotation_repo
  @annotation_repo ||= Rbbt.var.cache.annotation_repo.find
end

.extended(base) ⇒ Object



90
91
92
93
94
95
96
97
98
# File 'lib/rbbt/entity/study.rb', line 90

def self.extended(base)
  setup_file = File.join(base.dir, 'setup.rb')
  base.workflow = StudyWorkflow.clone
  base.workflow.study = base
  if File.exists? setup_file
    base.instance_eval Open.read(setup_file), setup_file
  end
  base.local_persist_dir = base.dir.var.cache.persistence.find
end

.studiesObject



100
101
102
103
# File 'lib/rbbt/entity/study.rb', line 100

def self.studies
  Dir.glob(File.join(Path === study_dir ? study_dir.find : study_dir, '*')).
    select{|f| File.directory? f}.sort.collect{|s| Study.setup(File.basename(s))}
end

Instance Method Details

#cnv_cohortObject



61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/rbbt/entity/study/cnv.rb', line 61

def cnv_cohort
  if @cnv_cohort.nil?
    @cnv_cohort = {}
    cnv_files.each do |f| 
      sample = File.basename(f)
      Sample.setup(sample, self)
      cnvs = Open.read(f).split("\n").sort
      CNV.setup(cnvs, organism)
      @cnv_cohort[sample] =  cnvs
    end
  end
  @cnv_cohort
end

#cnv_filesObject



57
58
59
# File 'lib/rbbt/entity/study/cnv.rb', line 57

def cnv_files
  dir.cnv.find.glob("*")
end

#cohortObject



103
104
105
106
107
108
109
# File 'lib/rbbt/entity/study/genotypes.rb', line 103

def cohort
  @cohort ||= genotype_files.collect do |f| 
    name = File.basename(f)
    genomic_mutations = Open.read(f).split("\n").sort
    GenomicMutation.setup(genomic_mutations, name, organism, watson)
  end.tap{|cohort| cohort.extend Genotype::Cohort}
end

#genotype_filesObject



99
100
101
# File 'lib/rbbt/entity/study/genotypes.rb', line 99

def genotype_files
  dir.genotypes.glob("*")
end

#has_cnv?Boolean

Returns:

  • (Boolean)


53
54
55
# File 'lib/rbbt/entity/study/cnv.rb', line 53

def has_cnv?
  dir.cnv.exists?
end

#has_genotypes?Boolean

Returns:

  • (Boolean)


89
90
91
# File 'lib/rbbt/entity/study/genotypes.rb', line 89

def has_genotypes?
  dir.genotypes.exists?
end

#has_methylation?Boolean

Returns:

  • (Boolean)


12
13
14
# File 'lib/rbbt/entity/study/methylation.rb', line 12

def has_methylation?
  dir.methylation.exists?
end

#has_snp?Boolean

Returns:

  • (Boolean)


9
10
11
# File 'lib/rbbt/entity/study/snp.rb', line 9

def has_snp?
  dir.snp.exists?
end

#job(task, *args) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
# File 'lib/rbbt/entity/study.rb', line 66

def job(task, *args)
  name, inputs = args
  if inputs.nil? and Hash === name
    inputs = name
    name = nil
  end
  name = self if name.nil? or name == :self or name == "self"
  step = workflow.job(task, name, {:organism => [:organism], :watson => [:watson]}.merge(inputs || {}))
  step.instance_variable_set(:@study, self)
  step
end

#match_samples(list) ⇒ Object



46
47
48
49
50
51
52
53
# File 'lib/rbbt/entity/study/samples.rb', line 46

def match_samples(list)
  if donor_id_field = (sample_info = self.sample_info).fields.select{|f| f =~ /donor\s+id/i}.first
    list_donors = sample_info.select(list).slice(donor_id_field).values.compact.flatten
    list_donor_samples = sample_info.select(list_donors).keys
    list = list_donor_samples.annotate((list + list_donor_samples).uniq)
  end
  list
end

#matricesObject



131
132
133
# File 'lib/rbbt/entity/study.rb', line 131

def matrices
  dir.matrices.glob('*').collect{|f| f.basename}
end

#matrix(type, format = "Ensembl Gene ID", organism = nil) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/rbbt/entity/study.rb', line 135

def matrix(type, format = "Ensembl Gene ID", organism = nil)
  organism = self.[:organism] if organism.nil?
  raise "No matrices defined for study #{ self }" unless defined? matrices.empty?
  raise "No type specified" if type.nil?
  type = type.to_s
  raise "No matrix #{ type } defined for study #{ self }" unless matrices.include? type
  data = dir.matrices[type].data.find if dir.matrices[type].data.exists?
  if dir.matrices[type].identifiers.exists?
    identifiers = dir.matrices[type].identifiers.find 
  else
    identifiers = Organism.identifiers(organism).find
  end
  samples = dir.matrices[type].samples.find if dir.matrices[type].samples.exists?
  samples = dir.samples.find if samples.nil? and dir.samples.exist?
  Matrix.new(data, identifiers, samples, format, organism)
end

#matrix_file(name) ⇒ Object



127
128
129
# File 'lib/rbbt/entity/study.rb', line 127

def matrix_file(name)
  dir.matrices[name.to_s].find
end

#metadataObject



113
114
115
# File 'lib/rbbt/entity/study.rb', line 113

def 
  @metadata ||= (dir["metadata.yaml"].yaml.extend IndiferentHash)
end

#methylation_cohortObject



20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/rbbt/entity/study/methylation.rb', line 20

def methylation_cohort
  if @methylation_cohort.nil?
    @methylation_cohort = {}
    methylation_files.each do |f| 
      sample = File.basename(f)
      Sample.setup(sample, self)
      methylations = Open.read(f).split("\n").sort
      Methylation.setup(methylations, organism)
      @methylation_cohort[sample] =  methylations
    end
  end
  @methylation_cohort
end

#methylation_filesObject



16
17
18
# File 'lib/rbbt/entity/study/methylation.rb', line 16

def methylation_files
  dir.methylation.find.glob("*")
end

#sample_infoObject



28
29
30
31
# File 'lib/rbbt/entity/study/samples.rb', line 28

def sample_info
  return nil unless dir.samples.exists?
  @sample_info ||= dir.samples.tsv.tap{|tsv| tsv.entity_options = {:study => self }}
end

#samplesObject



33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/rbbt/entity/study/samples.rb', line 33

def samples
  if @samples.nil?
    if sample_info.nil?
      @samples = self.cohort.collect{|g| g.jobname }
    else
      @samples = sample_info.keys
    end
    Sample.setup(@samples, self)
    @samples.study = self
  end
  @samples
end

#snp_cohortObject



17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/rbbt/entity/study/snp.rb', line 17

def snp_cohort
  if @snp_cohort.nil?
    @snp_cohort = {}
    snp_files.each do |f| 
      sample = File.basename(f)
      Sample.setup(sample, self)
      snps = Open.read(f).split("\n").sort
      SNP.setup(snps)
      @snp_cohort[sample] =  snps
    end
  end
  @snp_cohort
end

#snp_filesObject



13
14
15
# File 'lib/rbbt/entity/study/snp.rb', line 13

def snp_files
  @snp_files ||= dir.snp.find.glob("*")
end

#snp_indexObject



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/rbbt/entity/study/snp.rb', line 34

def snp_index
  local_persist_tsv("SNP2Samples", "SNP2Samples", {}, :persist => true, :serializer => :clean) do |data|

    require 'progress-monitor'
    Progress.monitor "SNP files", :stack_depth => 0
    snp_files.each do |file|
      file = file.to_s
      sample = File.basename file
      File.open(file.to_s) do |f|
        while line = f.gets
          snp = line.strip
          snp, allele = snp.split ":"
          snp_str = data[snp]

          if snp_str.nil?
            snp_str = ""
          else
            snp_str += "\t"
          end

          if allele
            snp_str << sample << ":" << allele
          else
            snp_str << sample
          end
          data[snp] = snp_str
        end
      end
    end

    TSV.setup data
    data.key_field = "RS ID"
    data.fields = ["Sample"]
    data.type = :flat
    data.serializer = :list
    data
  end
end

#usersObject



117
118
119
# File 'lib/rbbt/entity/study.rb', line 117

def users
  @users ||= [:users] || []
end