Top Level Namespace

Defined Under Namespace

Modules: BinManager, CdIncluder, ClassDataUtilities, ColName, DataUtilities, ImmunoscoreTest, StringToMongo Classes: Array, Cd, Classification, CtTile, DataClassifier, DefinensCSV, Density, Histogram, ImTile, ImmunoScoreResults, NilClass, Original, StatResults, Statistic, String, Test

Constant Summary collapse

JSON_CLASS_MAPPER =

mapping from definiens file type to proper database entity

{:ct_tile=>CtTile,
:im_tile=>ImTile,
:classification=>Classification,
:original=>Original,
:statistic=>Statistic,
:density=>Density,
:histogram=>Histogram}

Instance Method Summary collapse

Instance Method Details

#cd3?(file_path) ⇒ Boolean

Returns:

  • (Boolean)


16
17
18
# File 'lib/exporter.rb', line 16

def cd3? file_path
  not (file_path !~ /_CD3_/)
end

#cd8?(file_path) ⇒ Boolean

Returns:

  • (Boolean)


20
21
22
# File 'lib/exporter.rb', line 20

def cd8? file_path
  not (file_path !~ /_CD8_/)
end

#csv_clean_all(match_pattern = "*.csv", root_directory_path = BASE_DIR) ⇒ Object



58
59
60
61
62
63
64
65
66
67
# File 'lib/semicolon_cleaner.rb', line 58

def csv_clean_all match_pattern="*.csv", root_directory_path=BASE_DIR
  csv_matches=Dir.glob "#{File.absolute_path root_directory_path}/**/#{match_pattern}"
  csv_matches.each do |cm|
    begin
      DefinensCSV.new cm
    rescue
      puts "FAILED on #{cm}"
    end
  end
end

#csv_headers_to_keys(file_path) ⇒ Object

Utility function to create mongomapper keys

takes a file_path of teh csv file

prints keys in mongomapper format



607
608
609
610
611
# File 'lib/analyzer.rb', line 607

def csv_headers_to_keys file_path
  CSV.table(file_path).headers.sort!.each do |x|
      puts "key :#{x}, String"
  end
end

#csv_to_mongo(file_name = "test.csv", mongo_class = TestCsv) ⇒ Object

Load CSV file into mongo class Mongo class needs to exist



525
526
527
528
529
530
531
532
533
534
535
# File 'lib/analyzer.rb', line 525

def csv_to_mongo file_name="test.csv",mongo_class=TestCsv
  t=CSV.table file_name
  t.each_with_index do |row,i|
    m=mongo_class.new
    t.headers.each do |header|
      m[header]=row[header]
    end
    m.save
    puts "#{i}: #{mongo_class.count}"
  end
end

#export_clean_allObject



12
13
14
# File 'lib/exporter.rb', line 12

def export_clean_all
  `rm -rf #{EXPORT_DIR}/*`
end

#export_file(case_dir, file_path) ⇒ Object



24
25
26
27
28
29
30
31
32
33
# File 'lib/exporter.rb', line 24

def export_file case_dir,file_path
  if cd8? file_path
    Dir.mkdir(case_dir+"/CD8")  unless Dir.exist?(case_dir+"/CD8")
    fh=File.new(case_dir+"/CD8/"+File.basename(file_path),"w")
  else
    Dir.mkdir(case_dir+"/CD3")  unless Dir.exist?(case_dir+"/CD3")
    fh=File.new(case_dir+"/CD3/"+File.basename(file_path),"w")
  end

end

#export_mongoObject

exports novel directory structure with only relevant files



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/exporter.rb', line 36

def export_mongo
  # defined in immunoscore_results_loader
  JSON_CLASS_MAPPER.values.each do |mm_class|
    mm_class.all.each do |mm_object|
      puts "#{mm_object}: #{mm_object[:case_n]}"
      next unless mm_object[:case_n] != nil
      case_dir=(EXPORT_DIR+"/"+ mm_object[:case_n]) 
      Dir.mkdir case_dir unless Dir.exist?(case_dir)
      fh=export_file case_dir, mm_object[:path]
      fh.write(mm_object[:data_load])
      fh.close
   
    end
  end
end

#find_classificationObject



97
98
99
100
101
# File 'lib/immunoscore_results_loader.rb', line 97

def find_classification
  find_files(file_name_ending="*Classification.jpg").map do |x|
    x.merge({:type => :classification})
  end
end

#find_ctObject

special functions



79
80
81
82
83
84
85
86
# File 'lib/immunoscore_results_loader.rb', line 79

def find_ct 
  find_files(file_name_ending="*image*CT*.jpg").map{|x| x[:path]}.map do |path|
    case_n=get_case_n path
    cd =pull_cd(path)
    tile=pull_ct path
    {:cd_type=>cd, :path=>path, :tile=>tile, :case_n=>case_n, :type=>:ct_tile}
  end
end

#find_densityObject



120
121
122
123
124
125
126
127
128
# File 'lib/immunoscore_results_loader.rb', line 120

def find_density
   find_files(file_name_ending="*densitymap*.jpg").map do |x|
    path=x[:path]
    case_n=get_case_n path
    new_old=path.gsub(".jpg","")[-3..-1]
    {:path=>path, :case_n=>case_n, :new_old=>new_old, 
      :cd_type=>pull_cd(path),:type=>:density}
  end
end

#find_files(file_name_ending = "*Classification.jpg", base_dir = BASE_DIR) ⇒ Object

core function



62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/immunoscore_results_loader.rb', line 62

def find_files file_name_ending="*Classification.jpg", base_dir=BASE_DIR
  results=[]
  Dir.glob("#{base_dir}/**/#{file_name_ending}").each do |full_path|
    puts "am  working on #{full_path}"
    directory_name=full_path.split("\/")[-4]
    case_name=get_case_n full_path
    cd=directory_name.split("_")[2]
    results << {:case_n=>case_name,
                :cd_type =>cd,
                :path=>full_path}
  end
  results
end

#find_histogramObject



130
131
132
133
134
135
136
137
# File 'lib/immunoscore_results_loader.rb', line 130

def find_histogram
  find_files(file_name_ending="*histogram.jpg").map do |x|
    path=x[:path]
    case_n=get_case_n path
    cd=pull_cd(path)
    {:path=>path, :case_n=>case_n,:type=>:histogram, :cd_type=>cd}
  end
end

#find_imObject



88
89
90
91
92
93
94
95
# File 'lib/immunoscore_results_loader.rb', line 88

def find_im
  find_files(file_name_ending="*image*IM*.jpg").map{|x| x[:path]}.map do |path|
    case_n=get_case_n path
    cd =pull_cd(path)
    tile=pull_im path
    {:cd_type=>cd, :path=>path, :tile=>tile, :case_n=>case_n,:type=>:im_tile}
  end
end

#find_originalObject



103
104
105
106
107
# File 'lib/immunoscore_results_loader.rb', line 103

def find_original
  find_files(file_name_ending="*Original.jpg").map do |x|
     x.merge({:type => :original})
   end
end

#find_statisticsObject



109
110
111
112
113
114
115
116
117
# File 'lib/immunoscore_results_loader.rb', line 109

def find_statistics
  find_files(file_name_ending="*Statistics.csv").map do |x|
    puts x
    {:case_n=> x[:case_n],
     :path=> x[:path],
     :cd_type=>pull_cd(x[:path]),
    :type=> :statistic}
  end
end

#get_case_n(path) ⇒ Object

Case identification matched file names staring with RS follow by either - or _ followed by SP03 followed by either - or _ followed by case number. Matching of teh block number is not included



55
56
57
58
# File 'lib/immunoscore_results_loader.rb', line 55

def get_case_n path
  match=path.match(/(RS[_-].?.?.?.?.[-_]\d*)/i)
  match[0] if match
end

#is_semicolon?(file_path) ⇒ Boolean

Check if a file is an idiotic ; csv by counting ; and , in header line

Returns:

  • (Boolean)


203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/analyzer.rb', line 203

def is_semicolon? file_path
  csv=CSV.read(file_path)
  if (csv and csv.count !=0)
    puts file_path
    header=csv[0][0]
    if header==nil then return false end
    if (header and header.count(";"))>header.count(",")
      return true
    else
      return false
    end
  end
end

#is_tab?(file_path) ⇒ Boolean

Check if a tab file by counting t and , in header line

Returns:

  • (Boolean)


219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/analyzer.rb', line 219

def is_tab? file_path
  csv=CSV.read(file_path)
  if (csv and csv.count !=0)
    puts file_path
    header=csv[0][0]
    if header==nil then return false end
    if (header and header.count("\t"))>header.count(",")
      return true
    else
      return false
    end
  end
end

#load_table(file_path) ⇒ Object

Factory for new tables takes care of definiens and also stores file paths also removes nil headers\ also deals with tab formatted files



475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
# File 'lib/analyzer.rb', line 475

def load_table file_path
  if is_semicolon?(file_path) 
    file_path=remove_semicolon(file_path) 
  elsif is_tab?(file_path) 
    file_path=remove_tabs(file_path) 
  end
  remove_nil_headers file_path
  c=CSV.table file_path
  c.file_path=file_path
  begin
    c.data_classifier=DataClassifier.new file_path
  rescue
    c.data_classifier=false
  end
  c
end

#make_class_name(file_path) ⇒ Object



286
287
288
289
# File 'lib/analyzer.rb', line 286

def make_class_name file_path
  File.basename((file_path).gsub(".","_").gsub("@","").gsub("%","").gsub("-","")).camelize

end

#make_html(reporting_order = [ :histogram, :original, :density, :ct_tile, :im_tile, :statistic, :classification]) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/exporter.rb', line 54

def make_html reporting_order=[
                :histogram,
                :original,
                :density,
                :ct_tile,
                :im_tile,
                :statistic,
                :classification]
  all_cases=[]
  ImmunoScoreResults.all.each do |i|
    case_summary=[]
    i.cd.sort.each do |slide_cd|
      reporting_order.each do |feature|
        slide_cd.public_send(feature).sort!{|a,b| a.path<=>b.path}.each do |report|
          #binding.pry
          case_summary<< report[:path]
        end
      end
    end
    all_cases << case_summary
  end
  all_cases
end

#make_mongo_class(class_name) ⇒ Object

creates a mongomapper class



48
49
50
51
52
53
54
55
# File 'lib/analyzer.rb', line 48

def make_mongo_class class_name
  self.instance_variable_set "@#{class_name}", Class.new
  c=self.instance_variable_get "@#{class_name}"
  c.class_eval do
    include MongoMapper::Document
  end
  c
end

#make_query(data_set) ⇒ Object

query to ensure that entries are not recreated in teh database, but only paths and blobs are updated



160
161
162
163
164
165
166
167
168
169
# File 'lib/immunoscore_results_loader.rb', line 160

def make_query data_set
  if data_set.has_key?(:new_old)
    query={:case_n => data_set[:case_n],:cd_type=>data_set[:cd_type], :new_old=>data_set[:new_old]}
  elsif data_set.has_key?(:tile)
    query={:case_n => data_set[:case_n],:cd_type=>data_set[:cd_type], :tile=>data_set[:tile]}
  else
    query={:case_n => data_set[:case_n],:cd_type=>data_set[:cd_type]}
  end
  query
end

#mm_clean_allObject

resets all databases



276
277
278
279
280
# File 'lib/data_struct.rb', line 276

def mm_clean_all
  [ImmunoScoreResults, Cd, Histogram, Density, Statistic, Original, Classification, ImTile, CtTile].each do |mm_class|
    mm_class.delete_all
  end
end

#mongo_load_all(search_results = search_all(), json_class_mapper = JSON_CLASS_MAPPER) ⇒ Object

load datasets in their mongo classes

if entry preexisting updates results



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/immunoscore_results_loader.rb', line 177

def mongo_load_all search_results=search_all(), json_class_mapper=JSON_CLASS_MAPPER
  search_results.each_with_index do |data_set,i|
    puts "#{i}: #{data_set}"
    puts data_set["type"]
    mm_class=( json_class_mapper[data_set["type"]].to_sym or json_class_mapper[data_set[:type]])
    puts "mm_class=#{mm_class}"
    #puts "data set type=#{data_set[:type]}"
    puts "class =#{mm_class}"
    query=make_query data_set
    puts "query= #{query}"
    #query= {:case_n=>"RS-SV-05-16335", :cd_type=>"CD8", :tile=>"CT4"}
    if mm_class.where(query).all.empty?
      mm_object=mm_class.create data_set
      puts "mm created #{mm_object}"
    else
      # upserts if entry pre-existing
      mm_class.set(query, data_set, :upsert => true )
      puts "uspsert created #{mm_object}"
      mm_object=mm_class.where(data_set).find_one
    end
    #binding.pry
    mm_object.get_cd
    mm_object.save
   
  end
  puts "\n\n\n"
  puts "finished uploading to databes"
end

#name_split(text_string) ⇒ Object

Splits name in array components always returns array



87
88
89
90
91
92
93
94
# File 'lib/analyzer.rb', line 87

def name_split text_string
  if text_string.index " " or  text_string.index ","
    return (text_string.split(" ").split(",") ).flatten
  else 
    return [text_string].flatten
  end

end

#names_cleaner(text, names) ⇒ Object

Removes names from surg path text



99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/analyzer.rb', line 99

def names_cleaner text, names

  names.map!{|z| split_if_space z}.flatten! if names.class==Array
  names=name_split names if names.class==String
  names.each do |n|

    r=Regexp.new(n, Regexp::IGNORECASE)  
    text.gsub! r,""
    puts "cleaned #{n}"
  end
  text
end

#prompt(*args) ⇒ Object



17
18
19
20
# File 'lib/analyzer.rb', line 17

def prompt(*args)
    print(*args)
    gets.strip
end

#pull_cd(path) ⇒ Object

regex utilities



37
38
39
# File 'lib/immunoscore_results_loader.rb', line 37

def pull_cd path
  path.match(/.*(CD[8|3]).*/)[1] if path.match(/.*(CD[8|3]).*/)
end

#pull_ct(path) ⇒ Object



41
42
43
44
# File 'lib/immunoscore_results_loader.rb', line 41

def pull_ct path
  pattern=/.*(CT[1|2|3|4|5|6|7|8])\.jpg/
  path.match(pattern)[1] if path.match(pattern)
end

#pull_im(path) ⇒ Object



46
47
48
49
# File 'lib/immunoscore_results_loader.rb', line 46

def pull_im path
  pattern=/.*(IM[1|2|3|4|5|6|7|8])\.jpg/
  path.match(pattern)[1] if path.match(pattern)
end

#remove_nil_headers(file_path) ⇒ Object

remove empty columns from CSV files also deals with some windows encoding issues if needed



452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
# File 'lib/analyzer.rb', line 452

def remove_nil_headers file_path
  begin
    c=CSV.read(file_path,:headers => true)
  rescue
    c=CSV.read(@file_name,:headers => true,  :encoding => 'windows-1251:utf-8')
  end
  if c.headers.include? nil
    c.by_col!
    while c.headers.index(nil) != nil
      c.delete(c.headers.index(nil))
    end
    fh=File.new file_path, "w"
    fh.write c.to_csv
    fh.close
  end
end

#remove_semicolon(file_path) ⇒ Object

Remove semicolons ; => ,



236
237
238
239
240
241
242
243
# File 'lib/analyzer.rb', line 236

def remove_semicolon file_path
  puts "removing semicolons in #{file_path}"
  c=CSV.table file_path, :col_sep=> ";"
  fh=File.new file_path, "w"
  fh.write c.to_csv
  fh.close
  return file_path
end

#remove_tabs(file_path) ⇒ Object

Remove tabs ; => ,



247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/analyzer.rb', line 247

def remove_tabs file_path
  puts "removing tabs in #{file_path}"
  c=CSV.table file_path, :col_sep=> "\t"
  fh=File.new file_path, "w"
  fh.write c.to_csv
  fh.close
  ext=File.extname file_path
  if ext==".xls" 
    `cp #{file_path} #{file_path.gsub ext,".csv"}` 
    return file_path.gsub ext,".csv"
  else
    return file_path
  end
end

#search_allObject

merges all the JSON structures coming from the search functions some metaprogramming: calls all find functions listed above



142
143
144
145
146
147
148
149
# File 'lib/immunoscore_results_loader.rb', line 142

def search_all
  r=[]
  [:find_histogram,:find_density,:find_statistics,:find_original, :find_classification, :find_im,:find_ct].each do |m|
    r << self.send(m)
    puts "Done with #{m}"
  end
  r.flatten
end

#show(graphic_data) ⇒ Object



206
207
208
209
210
211
212
213
214
215
216
# File 'lib/immunoscore_results_loader.rb', line 206

def show graphic_data
  tf=Tempfile.new ["temp",".jpg"]
  tf.write graphic_data
  tf.close
  puts "#{tf.path}"
  fork do
    `open #{tf.path}`
  end
  Process.wait
  tf.unlink
end

#val_to_csv(val_name, file_name) ⇒ Object



648
649
650
651
# File 'lib/analyzer.rb', line 648

def val_to_csv val_name, file_name
  Case.find_all_by_validation_name(val_name).mongo_to_csv(file_name)
  puts "saved #{Case.find_all_by_validation_name(val_name)} in #{File.absolute_path file_name}"
end

#write_stats_csv(file_path) ⇒ Object

aggregates in a single spreadsheat all csv/stat files entries present in database



94
95
96
97
98
99
100
101
102
103
104
# File 'lib/mongo_aggregator.rb', line 94

def write_stats_csv file_path
  csv_array=[]<< Statistic.all[0].data_load.to_s.split("\n")[0]
  ###Stat Results will contain all datasets in an csv format
  Statistic.all.each do |stat_entry|
    csv_array<<stat_entry.data_load.to_s.split("\n")[1]
  end
  csv_array
  fh=File.new(file_path,"w")
  csv_array.each{|l| fh.write(l+"\n")}
  fh.close
end