Class: Cohort
- Inherits:
-
Object
- Object
- Cohort
- Defined in:
- lib/pets/cohort.rb
Constant Summary collapse
- @@ont =
{}
Class Attribute Summary collapse
-
.act_ont ⇒ Object
Which ontology use for ont related operations.
Instance Attribute Summary collapse
-
#profiles ⇒ Object
Returns the value of attribute profiles.
Class Method Summary collapse
- .get_ontology(ont_id) ⇒ Object
- .load_ontology(ont_name, ont_file, excluded_terms_file = nil) ⇒ Object
- .read_excluded_ont_file(file) ⇒ Object
Instance Method Summary collapse
-
#add_gen_feat(id, feat_array) ⇒ Object
[[chr1, start1, stop1],[chr1, start1, stop1]].
-
#add_record(rec, extra_attr = nil) ⇒ Object
[id, [profile], [[chr1, start1, stop1],[chr1, start1, stop1]]].
-
#check(hard = false) ⇒ Object
OLD format_patient_data.
- #compare_profiles(options = {}) ⇒ Object
- #compute_term_list_and_childs ⇒ Object
- #delete(id) ⇒ Object
- #each_profile ⇒ Object
- #each_var ⇒ Object
- #export_phenopackets(output_folder, genome_assembly, vcf_index: nil) ⇒ Object
- #filter_by_term_number(n_terms) ⇒ Object
- #generate_cluster_regions(meth, tag, lim) ⇒ Object
- #get_dataset_specifity_index(type) ⇒ Object
-
#get_general_profile(thr = 0) ⇒ Object
TODO move funcionality to semtools.
- #get_ic_analysis ⇒ Object
- #get_profile(id) ⇒ Object
- #get_profile_length_at_percentile(perc = 50, increasing_sort: false) ⇒ Object
- #get_profile_ontology_distribution_tables ⇒ Object
- #get_profile_redundancy ⇒ Object
- #get_profiles_mean_size ⇒ Object
- #get_profiles_terms_frequency(options = {}) ⇒ Object
- #get_vars(id) ⇒ Object
- #get_vars_sizes(summary = false) ⇒ Object
-
#index_vars ⇒ Object
equivalent to process_patient_data.
-
#initialize ⇒ Cohort
constructor
A new instance of Cohort.
- #link2ont(ont_id) ⇒ Object
-
#remove_incomplete_records ⇒ Object
remove resc that lacks of vars or phenotypes.
- #save(output_file, mode = :default, translate = false) ⇒ Object
- #select_by_profile! ⇒ Object
- #select_by_var! ⇒ Object
Constructor Details
#initialize ⇒ Cohort
39 40 41 42 43 44 |
# File 'lib/pets/cohort.rb', line 39 def initialize() @profiles = {} @vars = {} @extra_attr = {} @var_idx = Genomic_Feature.new([]) end |
Class Attribute Details
.act_ont ⇒ Object
Which ontology use for ont related operations
7 8 9 |
# File 'lib/pets/cohort.rb', line 7 def act_ont @act_ont end |
Instance Attribute Details
#profiles ⇒ Object
Returns the value of attribute profiles.
10 11 12 |
# File 'lib/pets/cohort.rb', line 10 def profiles @profiles end |
Class Method Details
.get_ontology(ont_id) ⇒ Object
12 13 14 |
# File 'lib/pets/cohort.rb', line 12 def self.get_ontology(ont_id) return @@ont[ont_id] end |
.load_ontology(ont_name, ont_file, excluded_terms_file = nil) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/pets/cohort.rb', line 16 def self.load_ontology(ont_name, ont_file, excluded_terms_file = nil) ont = nil if !ont_file.include?('.json') if !excluded_terms_file.nil? ont = Ontology.new(file: ont_file, load_file: true, removable_terms: read_excluded_ont_file(excluded_terms_file)) else ont = Ontology.new(file: ont_file, load_file: true) end else ont = Ontology.new ont.read(ont_file) end @@ont[ont_name] = ont end |
.read_excluded_ont_file(file) ⇒ Object
31 32 33 34 35 36 37 |
# File 'lib/pets/cohort.rb', line 31 def self.read_excluded_ont_file(file) excluded_hpo = [] File.open(file).each do |line| excluded_hpo << line.chomp end return excluded_hpo end |
Instance Method Details
#add_gen_feat(id, feat_array) ⇒ Object
- [chr1, start1, stop1],[chr1, start1, stop1]
83 84 85 |
# File 'lib/pets/cohort.rb', line 83 def add_gen_feat(id, feat_array) # [[chr1, start1, stop1],[chr1, start1, stop1]] @vars[id] = Genomic_Feature.new(feat_array) end |
#add_record(rec, extra_attr = nil) ⇒ Object
- id, [profile], [[chr1, start1, stop1],[chr1, start1, stop1]]
46 47 48 49 50 51 |
# File 'lib/pets/cohort.rb', line 46 def add_record(rec, extra_attr = nil) #[id, [profile], [[chr1, start1, stop1],[chr1, start1, stop1]]] id, profile, vars = rec @profiles[id] = profile.map{|t| t.to_sym} if !profile.nil? @extra_attr[id] = extra_attr if !extra_attr.nil? add_gen_feat(id, vars) if !vars.nil? end |
#check(hard = false) ⇒ Object
OLD format_patient_data
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
# File 'lib/pets/cohort.rb', line 123 def check(hard=false) # OLD format_patient_data ont = @@ont[Cohort.act_ont] rejected_terms = [] rejected_recs = [] @profiles.each do |id, terms| if hard terms = ont.clean_profile_hard(terms) rejec_terms = [] else terms, rejec_terms = ont.check_ids(terms) end if !rejec_terms.empty? STDERR.puts "WARNING: record #{id} has the unknown CODES '#{rejec_terms.join(',')}'. Codes removed." rejected_terms.concat(rejec_terms) end if terms.empty? rejected_recs << id else @profiles[id] = terms end end @profiles.select!{|id, record| !rejected_recs.include?(id)} @vars.select!{|id, record| !rejected_recs.include?(id)} return rejected_terms.uniq, rejected_recs end |
#compare_profiles(options = {}) ⇒ Object
203 204 205 206 207 |
# File 'lib/pets/cohort.rb', line 203 def compare_profiles(={}) ont = @@ont[Cohort.act_ont] similarities = ont.compare_profiles(**) return similarities end |
#compute_term_list_and_childs ⇒ Object
165 166 167 168 |
# File 'lib/pets/cohort.rb', line 165 def compute_term_list_and_childs() ont = @@ont[Cohort.act_ont] suggested_childs, term_with_childs_ratio = ont.compute_term_list_and_childs() end |
#delete(id) ⇒ Object
53 54 55 56 |
# File 'lib/pets/cohort.rb', line 53 def delete(id) @profiles.delete(id) @vars.delete(id) end |
#each_profile ⇒ Object
95 96 97 98 99 |
# File 'lib/pets/cohort.rb', line 95 def each_profile() @profiles.each do |id, profile| yield(id, profile) end end |
#each_var ⇒ Object
101 102 103 104 105 |
# File 'lib/pets/cohort.rb', line 101 def each_var() @vars.each do |id, var_info| yield(id, var_info) end end |
#export_phenopackets(output_folder, genome_assembly, vcf_index: nil) ⇒ Object
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 |
# File 'lib/pets/cohort.rb', line 255 def export_phenopackets(output_folder, genome_assembly, vcf_index: nil) ont = @@ont[Cohort.act_ont] = { "createdBy" => "PETS", "resources" => [{ "id" => "hp", "name" => "human phenotype ontology", "namespacePrefix" => "HP", "url" => "http://purl.obolibrary.org/obo/hp.owl", # "version" => "2018-03-08", "iriPrefix" => "http://purl.obolibrary.org/obo/HP_" }] } @profiles.each do |id, terms| phenopacket = {metaData: } query_sex = @extra_attr.dig(id, :sex) sex = query_sex.nil? ? 'UNKNOWN_SEX' : query_sex phenopacket[:subject] = { id: id, sex: sex } phenotypicFeatures = [] terms.each do |term| term_name = ont.translate_id(term) phenotypicFeatures << { type: { id: term, label: term_name}, classOfOnset: {"id" => "HP:0003577", "label" => "Congenital onset"} } end phenopacket[:phenotypicFeatures] = phenotypicFeatures if !vcf_index.nil? && vcf_index.include?(id) htsFiles = [] htsFiles << { "uri" => "file:/" + vcf_index[id], "description" => id, "htsFormat" => "VCF", "genomeAssembly" => genome_assembly, "individualToSampleIdentifiers" => { "patient1" => id } } phenopacket[:htsFiles] = htsFiles end File.open(File.join(output_folder, id.to_s + ".json"), "w") { |f| f.write JSON.pretty_generate(phenopacket) } id_variants = @vars[id] variants = [] if id_variants.nil? || id_variants.length == 0 variants << ['-', '-', '-'] else id_variants.each do |chr, reg| variants << [chr, reg[:start], reg[:stop]] end end end end |
#filter_by_term_number(n_terms) ⇒ Object
70 71 72 |
# File 'lib/pets/cohort.rb', line 70 def filter_by_term_number(n_terms) select_by_profile!{|id, profile| profile.length >= n_terms} end |
#generate_cluster_regions(meth, tag, lim) ⇒ Object
223 224 225 |
# File 'lib/pets/cohort.rb', line 223 def generate_cluster_regions(meth, tag, lim) @var_idx.generate_cluster_regions(meth, tag, lim) end |
#get_dataset_specifity_index(type) ⇒ Object
197 198 199 200 201 |
# File 'lib/pets/cohort.rb', line 197 def get_dataset_specifity_index(type) ont = @@ont[Cohort.act_ont] dsi = ont.get_dataset_specifity_index(type) return dsi end |
#get_general_profile(thr = 0) ⇒ Object
TODO move funcionality to semtools
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/pets/cohort.rb', line 107 def get_general_profile(thr=0) # TODO move funcionality to semtools term_count = Hash.new(0) each_profile do |id, prof| prof.each do |term| term_count[term] += 1 end end records = @profiles.length general_profile = [] term_count.each do |term, count| general_profile << term if count.fdiv(records) >= thr end ont = @@ont[Cohort.act_ont] return ont.clean_profile_hard(general_profile) end |
#get_ic_analysis ⇒ Object
178 179 180 181 182 183 |
# File 'lib/pets/cohort.rb', line 178 def get_ic_analysis() ont = @@ont[Cohort.act_ont] onto_ic, freq_ic = ont.get_observed_ics_by_onto_and_freq # IC for TERMS onto_ic_profile, freq_ic_profile = ont.get_profiles_resnik_dual_ICs # IC for PROFILES return onto_ic, freq_ic, onto_ic_profile, freq_ic_profile end |
#get_profile(id) ⇒ Object
87 88 89 |
# File 'lib/pets/cohort.rb', line 87 def get_profile(id) return @profiles[id] end |
#get_profile_length_at_percentile(perc = 50, increasing_sort: false) ⇒ Object
191 192 193 194 195 |
# File 'lib/pets/cohort.rb', line 191 def get_profile_length_at_percentile(perc=50, increasing_sort: false) ont = @@ont[Cohort.act_ont] length_percent = ont.get_profile_length_at_percentile(perc=perc, increasing_sort: increasing_sort) return length_percent end |
#get_profile_ontology_distribution_tables ⇒ Object
170 171 172 173 174 175 176 |
# File 'lib/pets/cohort.rb', line 170 def get_profile_ontology_distribution_tables() ont = @@ont[Cohort.act_ont] ontology_levels, distribution_percentage = ont.get_profile_ontology_distribution_tables ontology_levels.unshift(["level", "ontology", "cohort"]) distribution_percentage.unshift(["level", "ontology", "weighted cohort", "uniq terms cohort"]) return ontology_levels, distribution_percentage end |
#get_profile_redundancy ⇒ Object
153 154 155 156 157 |
# File 'lib/pets/cohort.rb', line 153 def get_profile_redundancy ont = @@ont[Cohort.act_ont] profile_sizes, parental_terms_per_profile = ont.get_profile_redundancy return profile_sizes, parental_terms_per_profile end |
#get_profiles_mean_size ⇒ Object
185 186 187 188 189 |
# File 'lib/pets/cohort.rb', line 185 def get_profiles_mean_size ont = @@ont[Cohort.act_ont] profile_mean_size = ont.get_profiles_mean_size return profile_mean_size end |
#get_profiles_terms_frequency(options = {}) ⇒ Object
159 160 161 162 163 |
# File 'lib/pets/cohort.rb', line 159 def get_profiles_terms_frequency(={}) ont = @@ont[Cohort.act_ont] term_stats = ont.get_profiles_terms_frequency(**) #https://www.ruby-lang.org/en/news/2019/12/12/separation-of-positional-and-keyword-arguments-in-ruby-3-0/ return term_stats end |
#get_vars(id) ⇒ Object
91 92 93 |
# File 'lib/pets/cohort.rb', line 91 def get_vars(id) return @vars[id] end |
#get_vars_sizes(summary = false) ⇒ Object
215 216 217 218 219 220 221 |
# File 'lib/pets/cohort.rb', line 215 def get_vars_sizes(summary=false) if summary return @var_idx.get_summary_sizes else return @var_idx.get_sizes end end |
#index_vars ⇒ Object
equivalent to process_patient_data
209 210 211 212 213 |
# File 'lib/pets/cohort.rb', line 209 def index_vars # equivalent to process_patient_data each_var do |id, var| @var_idx.merge(var, id) end end |
#link2ont(ont_id) ⇒ Object
149 150 151 |
# File 'lib/pets/cohort.rb', line 149 def link2ont(ont_id) @@ont[ont_id].load_profiles(@profiles) end |
#remove_incomplete_records ⇒ Object
remove resc that lacks of vars or phenotypes
74 75 76 77 78 79 80 81 |
# File 'lib/pets/cohort.rb', line 74 def remove_incomplete_records # remove resc that lacks of vars or phenotypes ids_with_terms = @profiles.keys ids_with_vars = [] @vars.each{|id, regs| ids_with_vars << id if regs.length > 0} full_ids = ids_with_vars & ids_with_terms @profiles.select!{|id, prof| full_ids.include?(id)} @vars.select!{|id, var| full_ids.include?(id)} end |
#save(output_file, mode = :default, translate = false) ⇒ Object
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
# File 'lib/pets/cohort.rb', line 227 def save(output_file, mode = :default, translate = false) File.open(output_file, 'w') do |f| f.puts "id\tchr\tstart\tstop\tterms" if mode == 'paco' ont = @@ont[Cohort.act_ont] @profiles.each do |id, terms| terms, rejected = ont.translate_ids(terms) if translate id_variants = @vars[id] variants = [] if id_variants.nil? || id_variants.length == 0 variants << ['-', '-', '-'] else id_variants.each do |chr, reg| variants << [chr, reg[:start], reg[:stop]] end end variants.each do |var| if mode == :default f.puts "#{id}\t#{terms.join('|')}\t#{var.join("\t")}" elsif mode == :paco f.puts "#{id}\t#{var.join("\t")}\t#{terms.join('|')}" else abort('Wrong save mode option, please try default or paco') end end end end end |
#select_by_profile! ⇒ Object
58 59 60 61 62 |
# File 'lib/pets/cohort.rb', line 58 def select_by_profile! @profiles.select!{|id, profile| yield(id, profile)} current_ids = @profiles.keys @vars.select!{|id, var| current_ids.include?(id)} end |
#select_by_var! ⇒ Object
64 65 66 67 68 |
# File 'lib/pets/cohort.rb', line 64 def select_by_var! @vars.select!{|id, profile| yield(id, profile)} current_ids = @vars.keys @profiles.select!{|id, var| current_ids.include?(id)} end |