Module: OboParser::Utilities

Defined in:
lib/obo_parser/utilities.rb,
lib/obo_parser/utilities/viz.rb,
lib/obo_parser/utilities/helpers.rb

Defined Under Namespace

Modules: Helpers, Viz

Constant Summary collapse

HOMOLONTO_HEADER =

Two column correspondances and translation tools

%{
format-version: 1.2
auto-generated-by: obo_parser
default-namespace: fix_me

[Typedef]
id: OGEE:has_member
name: has_member
is_a: OBO_REL:relationship
def: "C has_member C', C is an homology group and C' is a biological object" []
comment: "We leave open the possibility that an homology group is a biological object. Thus, an homology group C may have C' has_member, with C' being an homology group."
is_transitive: true
is_anti_symmetric: true

}

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.arrayify_pairs(options = {}) ⇒ Array

Takes a two column input file, references it to two ontologies, and returns a array of [[id1, id2], [id3,idn] …].

Example use

file = File.read('HAO_TGMA_list.txt')
col1_obo = File.read('hao.obo')
col2_obo = File.read('tgma.obo')

OboParser::Utilities.arrayify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)

Parameters:

  • options (Hash) (defaults to: {})

    options.

  • data (Symbol)

    the two column data file.

  • colo1_obo (Symbol)

    the OBO file referenced in the first column

  • colo2_obo (Symbol)

    the OBO file referenced in the second column

Returns:

  • (Array)

    an array of [[id string, id string], …]



328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
# File 'lib/obo_parser/utilities.rb', line 328

def self.arrayify_pairs(options = {})
  opt = {
    :data => nil,
    :col1_obo => nil,
    :col2_obo => nil,
    :index_start => 0
  }.merge!(options)

  c1obo = parse_obo_file(opt[:col1_obo])
  c2obo = parse_obo_file(opt[:col2_obo])

  array = Array.new

  i = opt[:index_start]
  v1 = nil # a label like 'head'
  v2 = nil
  c1 = nil # an id 'FOO:123'
  c2 = nil

  opt[:data].split(/\n/).each do |row|
    i += 1
    c1, c2 = row.split(/\t/).map(&:strip)

    if c1.nil? || c2.nil?
      next
    end

    # the conversion
    if c1 =~ /.*\:.*/ # it's an id, leave it
      v1 = c1
    else
      v1 = c1obo.term_hash[c1]
    end
    if c2 =~ /.*\:.*/ 
      v2 = c2
    else
      v2 = c2obo.term_hash[c2]
    end

    array << [c1,c2] 

  end
  return array 
end

.column_translate(options = {}) ⇒ String

Takes a two column input file, references it to two ontologies, and provides a report.

Example use

file = File.read('HAO_TGMA_list.txt')
col1_obo = File.read('hao.obo')
col2_obo = File.read('tgma.obo')

OboParser::Utilities.column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)

Output types

There are several output report types

:xls - Translates the columns in the data_file to the option passed in :translate_to, the first matching against col1_obo, the second against col2_obo.  Returns an Excel file.
:homolonto - Generates a homolonto compatible file to STDOUT
:cols - Prints a two column format to STDOUT

Parameters:

  • options (Hash) (defaults to: {})

    options.

  • data (Symbol)

    the two column data file.

Returns:

  • (String)

    the transation in tab delimted format.



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/obo_parser/utilities.rb', line 128

def self.column_translate(options = {})
  opt = {
    :data => nil,
    :col1_obo => nil,
    :col2_obo => nil,
    :translate_to => :id,        # also :label
    :output => :cols,            # also :xls, :homolonto, :parent_match
    :parent_match_to => :is_a,   # only used when :output == :parent_match
    :output_filename => 'foo',
    :index_start => 0
  }.merge!(options)

  c1obo = parse_obo_file(opt[:col1_obo])
  c2obo = parse_obo_file(opt[:col2_obo])

  case opt[:output]
  when :xls
    Spreadsheet.client_encoding = 'UTF-8'
    book = Spreadsheet::Workbook.new
    sheet = book.create_worksheet
  when :homolonto
    s = HOMOLONTO_HEADER
    opt[:translate_to] = :id # force this in this mode
  end

  i = opt[:index_start]
  v1 = nil # a label like 'head'
  v2 = nil
  c1 = nil # an id 'FOO:123'
  c2 = nil

  opt[:data].split(/\n/).each do |row|
    i += 1
    c1, c2 =  row.split(/\t/).map(&:strip)

    if c1.nil? || c2.nil?
      puts
      next
    end

    # the conversion
    if opt[:translate_to] == :id
      if c1 =~ /.*\:.*/ # it's an id, leave it
        v1 = c1
      else
        v1 = c1obo.term_hash[c1]
      end
      if c2 =~ /.*\:.*/ 
        v2 = c2
      else
        v2 = c2obo.term_hash[c2]
      end
    else
      if c1 =~ /.*\:.*/ 
        v1 = c1obo.id_hash[c1]
      else
        v1 = c1
      end
      if c2 =~ /.*\:.*/ 
        v2 = c2obo.id_hash[c2]
      else
        v2 = c2
      end
    end

    case opt[:output]
    when :cols
      puts "#{v1}\t#{v2}"
    when :xls
      sheet[i,0] = v1
      sheet[i,1] = OboParser::Utilities.term_stanza_from_file(v1, opt[:col1_obo])
      sheet[i,2] = v2
      sheet[i,3] = OboParser::Utilities.term_stanza_from_file(v2, opt[:col2_obo])
    when :homolonto
      s << OboParser::Utilities.homolonto_stanza(i, c1obo.id_hash[v1] , v1, v2) # "#{c1obo.id_hash[v1]} ! #{c2obo.id_hash[v2]}"
      s << "\n\n"
    end
  end

  case opt[:output]
  when :xls
    book.write "#{opt[:output_filename]}.xls"
  when :homolonto 
    puts s + "\n"
  end

  true
end

.cytoscapify(options = {}) ⇒ Object

Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape

Example use

OboParser::Utilities.cytoscapify(:ontologies => {‘HAO’ => File.read(‘input/hao.obo’), ‘TADS’ => File.read(‘input/tads.obo’), ‘TGMA’ => File.read(‘input/tgma.obo’), ‘FBBT’ => File.read(‘input/fbbt.obo’) }, :properties => [‘is_a’, ‘part_of’])

TODO: @return File1, File2, Filen

Parameters:

  • ontologies (Symbol)

    a Hash of #read files as values, keys as working names

  • properties (Symbol)

    an Array of properties like [‘is_a’, ‘part_of’]



468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
# File 'lib/obo_parser/utilities.rb', line 468

def self.cytoscapify(options = {})
  opt = {
    :ontologies => {},
    :properties => []
  }.merge!(options)

  return false if opt[:properties].empty?
  return false if opt[:ontologies].empty?

  nodes = File.new("nodes.tab", "w+")
  edges = File.new("edges.eda", "w+")

  opt[:ontologies].keys.each do |k|

    obo_file = parse_obo_file(opt[:ontologies][k])

    obo_file.terms.each do |t|
      nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"

      t.relationships.each do |rel, id|
        edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
      end
    end
  end

  nodes.close
  edges.close

  true

end

.dump_comparison_by_id(cutoff = 0, files = []) ⇒ String

Summarizes labels used by id in a two column tab delimited format Providing a cutoff will report only those ids/labels with > 1 label per id Does not (yet) include reference to synonyms, this could be easily extended.

Example use

of1 = File.read(‘foo1.obo’) of2 = File.read(‘foo2.obo’) of3 = File.read(‘foo3.obo’) of4 = File.read(‘foo4.obo’)

OboParser::Utilities.dump_comparison_by_id(0,[of1, of2, of3, of4])

Parameters:

  • cutoff (Integer) (defaults to: 0)

    only Term ids with > cutoff labels will be reported

  • files (Array) (defaults to: [])

    an Array of read files

Returns:

  • (String)

    the translation in tab delimted format



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/obo_parser/utilities.rb', line 20

def self.dump_comparison_by_id(cutoff = 0, files = [])
  return '' if files.size < 1

  of = [] 
  files.each_with_index do |f, i|
    of[i] = parse_obo_file(f)	
  end

  all_data = {}

  of.each do |f|
    tmp_hash = f.id_hash
    tmp_hash.keys.each do |id|
      if all_data[id]
        all_data[id].push(tmp_hash[id])
      else
        all_data[id] = [tmp_hash[id]]
      end
    end
  end

  all_data.keys.sort.each do |k|
    if all_data[k].uniq.size > cutoff 
      puts "#{k}\t#{all_data[k].uniq.join(', ')}"
    end
  end
end

.homolonto_stanza(id, name, *members) ⇒ String

Returns a HomolOnto Stanza

Parameters:

  • id (String)

    an externally tracked id for the id: tag like ‘00001’

  • name (String)

    a name for the name: tag

  • members (Array)

    a Array of 2 or more members for the relationship: has_member tag like [‘FOO:123’, ‘BAR:456’]

Returns:

  • (String)

    the stanza requested



447
448
449
450
451
452
453
454
455
456
457
# File 'lib/obo_parser/utilities.rb', line 447

def self.homolonto_stanza(id, name, *members)
  return 'NOT ENOUGH RELATIONSHIPS' if members.length < 2
  s = []
  s << '[Term]'
  s << "id: HOG:#{id}"
  s << "name: #{name}"
  members.each do |m|
    s << "relationship: has_member #{m}"
  end
  s.join("\n")
end

.set_comparison(options = {}) ⇒ String | Array

Provides a set comparisson of correspondances between two sets of correspondances. Finds correspondances that are shared, in left only, or in right only.

Example use

data1 = File.read('input/hao_fbbt_vetted_list2.txt')     
data2 = File.read('input/hao_fbbt_from_bioportal.txt')   
col1_obo = File.read('hao.obo')
col2_obo = File.read('tgma.obo')

OboParser::Utilities.set_comparison(:data1 => data1 , :data2 => data2, :col1_obo => col1_obo, :col2_obo => col2_obo, :return => :text ) # => a text report

or

foo = OboParser::Utilities.set_comparison(:data1 => data1 , :data2 => data2, :col1_obo => col1_obo, :col2_obo => col2_obo, :return => :array )

“Report” types

Use :return to return different values

:array - Returns an Array
:text - Generates 4 column report (id, label, id, label) to STDOUT

Parameters:

  • options (Hash) (defaults to: {})

    options.

  • data1 (Symbol)

    two columns (tab) with a correspondance between ontologies 1 and 2

  • data2 (Symbol)

    as data1, a second set of correspondances

  • col1_obo (Symbol)

    the OBO file corresponding to the first column of data1 and data2

  • col2_obo (Symbol)

    the OBO file corresponding to the second column of data1 and data2

  • return (Symbol)

    the value to return

Returns:

  • (String | Array)


398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
# File 'lib/obo_parser/utilities.rb', line 398

def self.set_comparison(options = {})
  opt = {
    :data1 => nil,
    :data2 => nil,
    :col1_obo => nil,
    :col2_obo => nil,
    :return => :array # also :text
  }.merge!(options)

  c1obo = parse_obo_file(opt[:col1_obo])
  c2obo = parse_obo_file(opt[:col2_obo])

  th1 = c1obo.id_hash
  th2 = c2obo.id_hash

  array1 =  OboParser::Utilities.arrayify_pairs(:data => opt[:data1], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
  array2 =  OboParser::Utilities.arrayify_pairs(:data => opt[:data2], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])

  index1 = array1.inject({}){|hsh, i| hsh.merge!( "#{i[0]}#{i[1]}" => i)}
  index2 = array2.inject({}){|hsh, i| hsh.merge!( "#{i[0]}#{i[1]}" => i)}

  result = {:in_left => [], :in_right => [], :in_both => []}

  result[:in_both]  = (index1.keys & index2.keys).collect{|k| [ index1[k][0], th1[index1[k][0]], index1[k][1], th2[index1[k][1]]                   ,             ]  }
  result[:in_left]  = ((index1.keys | index2.keys)  - index2.keys ).collect{|k| [ index1[k][0], th1[index1[k][0]], index1[k][1], th2[index1[k][1]] ,             ]  }
  result[:in_right] = ((index1.keys | index2.keys)  - index1.keys ).collect{|k| [ index2[k][0], th1[index2[k][0]], index2[k][1], th2[index2[k][1]] ,             ]  }

  case opt[:return]
  when :array
    return result
  when :text
    [:in_both, :in_left, :in_right].each do |t|
      puts "--- #{t.to_s} (#{result[t].size}) ---"
      result[t].each do |r|
        puts r.join("\t")
      end
    end
  else
    return nil
  end
end

.shared_labels(files = []) ⇒ String

Returns all labels found in all passed ontologies. Does not yet include synonyms. Caution: strips adult, embryonic, larval from labels (comment to remove)

Example use

of1 = File.read('fly_anatomy.obo')	
of2 = File.read('hao.obo')	
of3 = File.read('mosquito_anatomy.obo')	

OboParser::Utilities.shared_labels([of1, of3])

Parameters:

  • files (Array) (defaults to: [])

    an Array of read files

Returns:

  • (String)

    lables, one per line



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/obo_parser/utilities.rb', line 61

def self.shared_labels(files = []) 
  comparison = {}

  files.each do |f|
    o = parse_obo_file(f)
    o.term_hash.keys.each do |k|

      # TODO: make this optional 
      tmp = k.gsub(/adult/, "").strip
      tmp = k.gsub(/embryonic\/larval/, "").strip

      if comparison[tmp]
        comparison[tmp] += 1
      else
        comparison.merge!(tmp => 1)
      end
    end
  end

  match = [] 
  comparison.keys.each do |k|
    if comparison[k] == files.size 
      match.push k
    end
  end

  puts match.sort.join("\n")
  puts "\n#{match.length} total."

end

.term_stanza_from_file(id, file) ⇒ String

Given a Term id and a String representing an OBO file returns that stanza.

Parameters:

  • id (String)

    a Term id like ‘FOO:123’

  • file (String)

    a Obo file as a String like File.read(‘my.obo’)

Returns:

  • (String)

    the stanza requested



508
509
510
511
512
513
# File 'lib/obo_parser/utilities.rb', line 508

def self.term_stanza_from_file(id, file)
  foo = ""
  file =~ /(^\[Term\]\s*?id:\s*?#{id}.*?)(^\[Term\]|^\[Typedef\])/im
  foo = $1 if !$1.nil?
  foo.gsub(/\n\r/,"\n")
end

Instance Method Details

#parents(options = {}) ⇒ Hash

Takes a two column input file, references it to two ontologies, and returns a report that identifies data pairs that have parents who are also a data pair given a provided property/relation type.

Example use

file = File.read('HAO_TGMA_list.txt')
col1_obo = File.read('hao.obo')
col2_obo = File.read('tgma.obo')

foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => ‘is_a’)

puts “– NO (#.size)n” puts foo.join(“n”) puts “– YES (#.size)n” puts foo.join(“n”)

Parameters:

  • options (Hash) (defaults to: {})

    options.

  • data (Symbol)

    the two column data file.

  • colo1_obo (Symbol)

    the OBO file referenced in the first column

  • colo2_obo (Symbol)

    the OBO file referenced in the second column

  • property (Symbol)

    the OBO relationship/property to check against (e.g. ‘is_a’, ‘part_of’)

Returns:

  • (Hash)

    a hash of => {, :no => {}}



240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/obo_parser/utilities.rb', line 240

def parents(options = {})
  opt = {
    :data => nil,
    :col1_obo => nil,
    :col2_obo => nil,
    :property => nil
  }.merge!(options)

  return false if opt[:property].nil? 
  c1obo = parse_obo_file(opt[:col1_obo])
  c2obo = parse_obo_file(opt[:col2_obo])

  result = {:yes => [], :no => [], :unplaced => []}
  # update
  array = OboParser::Utilities.arrayify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])

  # index the pairs
  index = array.inject({}){|hsh, i| hsh.update({"#{i[0]}#{i[1]}" => i})}

  obo1_hash = c1obo.id_index
  obo2_hash = c2obo.id_index

  array.each do |k|
    a = k[0]
    b = k[1]

    ids_1 = []
    ids_2 = []

    if !obo1_hash[a]
      puts "can't find #{k}\n"
      next
    end

    if !obo2_hash[b]
      puts "can't find #{k}\n"
      next
    end

    obo1_hash[a].relationships.each do |rel, id| 
      if rel == opt[:property] 
        ids_1.push id
      end
    end

    obo2_hash[b].relationships.each do |rel, id|
      if rel == opt[:property] 
        ids_2.push id
      end
    end

    unplaced = true

    ids_1.each do |c|
      ids_2.each do |d|
        t = "#{a} -> #{b}"
        if index["#{c}#{d}"]   # think this is right   hash[c] == d
          result[:yes].push(t)
          unplaced = false
          next # don't add again after we find a hit
        else
          result[:no].push(t)
          unplaced = false
        end
      end
    end
    result[:unplaced] 

  end

  result
end