Class: Cohort

Inherits:
Object
  • Object
show all
Defined in:
lib/pets/cohort.rb

Constant Summary collapse

@@ont =
{}

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeCohort



39
40
41
42
43
44
# File 'lib/pets/cohort.rb', line 39

def initialize()
  @profiles = {}
  @vars = {}
  @extra_attr = {}
  @var_idx = Genomic_Feature.new([])
end

Class Attribute Details

.act_ontObject

Which ontology use for ont related operations



7
8
9
# File 'lib/pets/cohort.rb', line 7

def act_ont
  @act_ont
end

Instance Attribute Details

#profilesObject

Returns the value of attribute profiles.



10
11
12
# File 'lib/pets/cohort.rb', line 10

def profiles
  @profiles
end

Class Method Details

.get_ontology(ont_id) ⇒ Object



12
13
14
# File 'lib/pets/cohort.rb', line 12

def self.get_ontology(ont_id)
  return @@ont[ont_id]
end

.load_ontology(ont_name, ont_file, excluded_terms_file = nil) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/pets/cohort.rb', line 16

def self.load_ontology(ont_name, ont_file, excluded_terms_file = nil)
  ont = nil
  if !ont_file.include?('.json')
    if !excluded_terms_file.nil?
      ont = Ontology.new(file: ont_file, load_file: true, removable_terms: read_excluded_ont_file(excluded_terms_file))
    else
      ont = Ontology.new(file: ont_file, load_file: true)
    end
  else
    ont = Ontology.new
    ont.read(ont_file)
  end
  @@ont[ont_name] = ont
end

.read_excluded_ont_file(file) ⇒ Object



31
32
33
34
35
36
37
# File 'lib/pets/cohort.rb', line 31

def self.read_excluded_ont_file(file)
  excluded_hpo = []
  File.open(file).each do |line|
    excluded_hpo << line.chomp
  end
  return excluded_hpo
end

Instance Method Details

#add_gen_feat(id, feat_array) ⇒ Object

[chr1, start1, stop1],[chr1, start1, stop1]


83
84
85
# File 'lib/pets/cohort.rb', line 83

def add_gen_feat(id, feat_array) # [[chr1, start1, stop1],[chr1, start1, stop1]]
  @vars[id] = Genomic_Feature.new(feat_array)
end

#add_record(rec, extra_attr = nil) ⇒ Object

id, [profile], [[chr1, start1, stop1],[chr1, start1, stop1]]


46
47
48
49
50
51
# File 'lib/pets/cohort.rb', line 46

def add_record(rec, extra_attr = nil) #[id, [profile], [[chr1, start1, stop1],[chr1, start1, stop1]]]
  id, profile, vars = rec
  @profiles[id] = profile.map{|t| t.to_sym} if !profile.nil? 
  @extra_attr[id] = extra_attr if !extra_attr.nil? 
  add_gen_feat(id, vars) if !vars.nil?
end

#check(hard = false) ⇒ Object

OLD format_patient_data



123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/pets/cohort.rb', line 123

def check(hard=false) # OLD format_patient_data
  ont = @@ont[Cohort.act_ont]
  rejected_terms = []
  rejected_recs = []
  @profiles.each do |id, terms|
    if hard
      terms = ont.clean_profile_hard(terms)
      rejec_terms = []
    else
      terms, rejec_terms = ont.check_ids(terms)
    end
    if !rejec_terms.empty?
      STDERR.puts "WARNING: record #{id} has the unknown CODES '#{rejec_terms.join(',')}'. Codes removed."
      rejected_terms.concat(rejec_terms)
    end
    if terms.empty?
      rejected_recs << id
    else
      @profiles[id] = terms
    end
  end
  @profiles.select!{|id, record| !rejected_recs.include?(id)}
  @vars.select!{|id, record| !rejected_recs.include?(id)}
  return rejected_terms.uniq, rejected_recs
end

#compare_profiles(options = {}) ⇒ Object



203
204
205
206
207
# File 'lib/pets/cohort.rb', line 203

def compare_profiles(options={})
  ont = @@ont[Cohort.act_ont]
  similarities = ont.compare_profiles(**options)
  return similarities
end

#compute_term_list_and_childsObject



165
166
167
168
# File 'lib/pets/cohort.rb', line 165

def compute_term_list_and_childs()
  ont = @@ont[Cohort.act_ont]
  suggested_childs, term_with_childs_ratio = ont.compute_term_list_and_childs()
end

#delete(id) ⇒ Object



53
54
55
56
# File 'lib/pets/cohort.rb', line 53

def delete(id)
  @profiles.delete(id)
  @vars.delete(id)
end

#each_profileObject



95
96
97
98
99
# File 'lib/pets/cohort.rb', line 95

def each_profile()
  @profiles.each do |id, profile|
    yield(id, profile)
  end
end

#each_varObject



101
102
103
104
105
# File 'lib/pets/cohort.rb', line 101

def each_var()
  @vars.each do |id, var_info|
    yield(id, var_info)
  end
end

#export_phenopackets(output_folder, genome_assembly, vcf_index: nil) ⇒ Object



255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
# File 'lib/pets/cohort.rb', line 255

def export_phenopackets(output_folder, genome_assembly, vcf_index: nil)
  ont = @@ont[Cohort.act_ont]
   = {
    "createdBy" => "PETS",
    "resources" => [{
      "id" => "hp",
      "name" => "human phenotype ontology",
      "namespacePrefix" => "HP",
      "url" => "http://purl.obolibrary.org/obo/hp.owl",
#       "version" => "2018-03-08",
      "iriPrefix" => "http://purl.obolibrary.org/obo/HP_"
    }]
  }

  @profiles.each do |id, terms|
    phenopacket = {metaData: }
    query_sex = @extra_attr.dig(id, :sex)
    sex = query_sex.nil? ? 'UNKNOWN_SEX' : query_sex
    phenopacket[:subject] = {
      id: id,
      sex: sex
    }
    phenotypicFeatures = []
    terms.each do |term|
      term_name = ont.translate_id(term)
      phenotypicFeatures << {
        type: { id: term, label: term_name},
        classOfOnset: {"id" => "HP:0003577", "label" => "Congenital onset"}
      }
    end
    phenopacket[:phenotypicFeatures] = phenotypicFeatures
    if !vcf_index.nil? && vcf_index.include?(id)
      htsFiles = []
      htsFiles << {
        "uri" => "file:/" + vcf_index[id],
            "description" => id,
            "htsFormat" => "VCF",
            "genomeAssembly" => genome_assembly,
            "individualToSampleIdentifiers" => { "patient1" => id }
      }
      phenopacket[:htsFiles] = htsFiles
    end
    File.open(File.join(output_folder, id.to_s + ".json"), "w") { |f| f.write JSON.pretty_generate(phenopacket) }
    id_variants = @vars[id]
    variants = []
    if id_variants.nil? || id_variants.length == 0
      variants << ['-', '-', '-']
    else
      id_variants.each do |chr, reg|
        variants << [chr, reg[:start], reg[:stop]]
      end  
    end
  end
end

#filter_by_term_number(n_terms) ⇒ Object



70
71
72
# File 'lib/pets/cohort.rb', line 70

def filter_by_term_number(n_terms)
  select_by_profile!{|id, profile| profile.length >= n_terms}
end

#generate_cluster_regions(meth, tag, lim) ⇒ Object



223
224
225
# File 'lib/pets/cohort.rb', line 223

def generate_cluster_regions(meth, tag, lim)
  @var_idx.generate_cluster_regions(meth, tag, lim)
end

#get_dataset_specifity_index(type) ⇒ Object



197
198
199
200
201
# File 'lib/pets/cohort.rb', line 197

def get_dataset_specifity_index(type)
  ont = @@ont[Cohort.act_ont]
  dsi = ont.get_dataset_specifity_index(type)
  return dsi
end

#get_general_profile(thr = 0) ⇒ Object

TODO move funcionality to semtools



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/pets/cohort.rb', line 107

def get_general_profile(thr=0) # TODO move funcionality to semtools
  term_count = Hash.new(0)
  each_profile do |id, prof|
    prof.each do |term|
      term_count[term] += 1
    end
  end
  records = @profiles.length
  general_profile = []
  term_count.each do |term, count|
    general_profile << term if count.fdiv(records) >= thr
  end
  ont = @@ont[Cohort.act_ont]
  return ont.clean_profile_hard(general_profile)
end

#get_ic_analysisObject



178
179
180
181
182
183
# File 'lib/pets/cohort.rb', line 178

def get_ic_analysis()
  ont = @@ont[Cohort.act_ont]
  onto_ic, freq_ic = ont.get_observed_ics_by_onto_and_freq # IC for TERMS
  onto_ic_profile, freq_ic_profile = ont.get_profiles_resnik_dual_ICs # IC for PROFILES
  return onto_ic, freq_ic, onto_ic_profile, freq_ic_profile
end

#get_profile(id) ⇒ Object



87
88
89
# File 'lib/pets/cohort.rb', line 87

def get_profile(id)
  return @profiles[id]
end

#get_profile_length_at_percentile(perc = 50, increasing_sort: false) ⇒ Object



191
192
193
194
195
# File 'lib/pets/cohort.rb', line 191

def get_profile_length_at_percentile(perc=50, increasing_sort: false)
  ont = @@ont[Cohort.act_ont]
  length_percent = ont.get_profile_length_at_percentile(perc=perc, increasing_sort: increasing_sort)
  return length_percent
end

#get_profile_ontology_distribution_tablesObject



170
171
172
173
174
175
176
# File 'lib/pets/cohort.rb', line 170

def get_profile_ontology_distribution_tables()
  ont = @@ont[Cohort.act_ont]
  ontology_levels, distribution_percentage = ont.get_profile_ontology_distribution_tables
  ontology_levels.unshift(["level", "ontology", "cohort"])
  distribution_percentage.unshift(["level", "ontology", "weighted cohort", "uniq terms cohort"])
  return ontology_levels, distribution_percentage
end

#get_profile_redundancyObject



153
154
155
156
157
# File 'lib/pets/cohort.rb', line 153

def get_profile_redundancy
  ont = @@ont[Cohort.act_ont]
  profile_sizes, parental_terms_per_profile = ont.get_profile_redundancy
  return profile_sizes, parental_terms_per_profile
end

#get_profiles_mean_sizeObject



185
186
187
188
189
# File 'lib/pets/cohort.rb', line 185

def get_profiles_mean_size
  ont = @@ont[Cohort.act_ont]
  profile_mean_size = ont.get_profiles_mean_size
  return profile_mean_size
end

#get_profiles_terms_frequency(options = {}) ⇒ Object



159
160
161
162
163
# File 'lib/pets/cohort.rb', line 159

def get_profiles_terms_frequency(options={})
  ont = @@ont[Cohort.act_ont]
  term_stats = ont.get_profiles_terms_frequency(**options) #https://www.ruby-lang.org/en/news/2019/12/12/separation-of-positional-and-keyword-arguments-in-ruby-3-0/
  return term_stats
end

#get_vars(id) ⇒ Object



91
92
93
# File 'lib/pets/cohort.rb', line 91

def get_vars(id)
  return @vars[id]
end

#get_vars_sizes(summary = false) ⇒ Object



215
216
217
218
219
220
221
# File 'lib/pets/cohort.rb', line 215

def get_vars_sizes(summary=false)
  if summary
    return @var_idx.get_summary_sizes
  else
    return @var_idx.get_sizes
  end
end

#index_varsObject

equivalent to process_patient_data



209
210
211
212
213
# File 'lib/pets/cohort.rb', line 209

def index_vars # equivalent to process_patient_data
  each_var do |id, var|
    @var_idx.merge(var, id)
  end
end

#link2ont(ont_id) ⇒ Object



149
150
151
# File 'lib/pets/cohort.rb', line 149

def link2ont(ont_id)
  @@ont[ont_id].load_profiles(@profiles)
end

#remove_incomplete_recordsObject

remove resc that lacks of vars or phenotypes



74
75
76
77
78
79
80
81
# File 'lib/pets/cohort.rb', line 74

def remove_incomplete_records # remove resc that lacks of vars or phenotypes
  ids_with_terms = @profiles.keys
  ids_with_vars = []
  @vars.each{|id, regs| ids_with_vars << id if regs.length > 0}
  full_ids = ids_with_vars & ids_with_terms
  @profiles.select!{|id, prof| full_ids.include?(id)}
  @vars.select!{|id, var| full_ids.include?(id)}
end

#save(output_file, mode = :default, translate = false) ⇒ Object



227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/pets/cohort.rb', line 227

def save(output_file, mode = :default, translate = false)
  File.open(output_file, 'w') do |f|
    f.puts "id\tchr\tstart\tstop\tterms" if mode == 'paco'
    ont = @@ont[Cohort.act_ont]
    @profiles.each do |id, terms|
      terms, rejected = ont.translate_ids(terms) if translate
      id_variants = @vars[id]
      variants = []
      if id_variants.nil? || id_variants.length == 0
        variants << ['-', '-', '-']
      else
        id_variants.each do |chr, reg|
          variants << [chr, reg[:start], reg[:stop]]
        end 
      end
      variants.each do |var|
        if mode == :default
          f.puts "#{id}\t#{terms.join('|')}\t#{var.join("\t")}"
        elsif mode == :paco
          f.puts "#{id}\t#{var.join("\t")}\t#{terms.join('|')}"
        else
          abort('Wrong save mode option, please try default or paco')
        end
      end
    end
  end
end

#select_by_profile!Object



58
59
60
61
62
# File 'lib/pets/cohort.rb', line 58

def select_by_profile!
  @profiles.select!{|id, profile| yield(id, profile)}
  current_ids = @profiles.keys
  @vars.select!{|id, var| current_ids.include?(id)}
end

#select_by_var!Object



64
65
66
67
68
# File 'lib/pets/cohort.rb', line 64

def select_by_var!
  @vars.select!{|id, profile| yield(id, profile)}
  current_ids = @vars.keys
  @profiles.select!{|id, var| current_ids.include?(id)}
end