Module: Misc

Defined in:
lib/rbbt/util/tar.rb,
lib/rbbt/util/misc.rb,
lib/rbbt/util/misc/lock.rb,
lib/rbbt/util/misc/math.rb,
lib/rbbt/util/misc/omics.rb,
lib/rbbt/util/misc/pipes.rb,
lib/rbbt/util/misc/format.rb,
lib/rbbt/util/misc/system.rb,
lib/rbbt/util/misc/inspect.rb,
lib/rbbt/util/misc/objects.rb,
lib/rbbt/util/misc/options.rb,
lib/rbbt/util/misc/development.rb,
lib/rbbt/util/misc/manipulation.rb

Overview

SOURCE: gist.github.com/sinisterchipmunk/1335041 Adapted for Rbbt

Constant Summary collapse

LOCK_MUTEX =
Mutex.new
LOCK_REPO_SERIALIZER =
Marshal
Log2Multiplier =
1.0 / Math.log(2.0)
Log10Multiplier =
1.0 / Math.log(10.0)
IUPAC2BASE =
{
  "A" => ["A"],
  "C" => ["C"],
  "G" => ["G"],
  "T" => ["T"],
  "U" => ["U"],
  "R" => "A or G".split(" or "),
  "Y" => "C or T".split(" or "),
  "S" => "G or C".split(" or "),
  "W" => "A or T".split(" or "),
  "K" => "G or T".split(" or "),
  "M" => "A or C".split(" or "),
  "B" => "C or G or T".split(" or "),
  "D" => "A or G or T".split(" or "),
  "H" => "A or C or T".split(" or "),
  "V" => "A or C or G".split(" or "),
  "N" => %w(A C T G),
}
BASE2COMPLEMENT =
{
  "A" => "T",
  "C" => "G",
  "G" => "C",
  "T" => "A",
  "U" => "A",
}
THREE_TO_ONE_AA_CODE =
{
  "ala" =>   "A",
  "arg" =>   "R",
  "asn" =>   "N",
  "asp" =>   "D",
  "cys" =>   "C",
  "glu" =>   "E",
  "gln" =>   "Q",
  "gly" =>   "G",
  "his" =>   "H",
  "ile" =>   "I",
  "leu" =>   "L",
  "lys" =>   "K",
  "met" =>   "M",
  "phe" =>   "F",
  "pro" =>   "P",
  "ser" =>   "S",
  "thr" =>   "T",
  "trp" =>   "W",
  "tyr" =>   "Y",
  "val" =>   "V"
}
CODON_TABLE =
{
  "ATT" => "I",
  "ATC" => "I",
  "ATA" => "I",
  "CTT" => "L",
  "CTC" => "L",
  "CTA" => "L",
  "CTG" => "L",
  "TTA" => "L",
  "TTG" => "L",
  "GTT" => "V",
  "GTC" => "V",
  "GTA" => "V",
  "GTG" => "V",
  "TTT" => "F",
  "TTC" => "F",
  "ATG" => "M",
  "TGT" => "C",
  "TGC" => "C",
  "GCT" => "A",
  "GCC" => "A",
  "GCA" => "A",
  "GCG" => "A",
  "GGT" => "G",
  "GGC" => "G",
  "GGA" => "G",
  "GGG" => "G",
  "CCT" => "P",
  "CCC" => "P",
  "CCA" => "P",
  "CCG" => "P",
  "ACT" => "T",
  "ACC" => "T",
  "ACA" => "T",
  "ACG" => "T",
  "TCT" => "S",
  "TCC" => "S",
  "TCA" => "S",
  "TCG" => "S",
  "AGT" => "S",
  "AGC" => "S",
  "TAT" => "Y",
  "TAC" => "Y",
  "TGG" => "W",
  "CAA" => "Q",
  "CAG" => "Q",
  "AAT" => "N",
  "AAC" => "N",
  "CAT" => "H",
  "CAC" => "H",
  "GAA" => "E",
  "GAG" => "E",
  "GAT" => "D",
  "GAC" => "D",
  "AAA" => "K",
  "AAG" => "K",
  "CGT" => "R",
  "CGC" => "R",
  "CGA" => "R",
  "CGG" => "R",
  "AGA" => "R",
  "AGG" => "R",
  "TAA" => "*",
  "TAG" => "*",
  "TGA" => "*",
}
BLOCK_SIZE =
1024 * 8
PIPE_MUTEX =
Mutex.new
OPEN_PIPE_IN =
[]
COLOR_LIST =
%w(#BC80BD #CCEBC5 #FFED6F #8DD3C7 #FFFFB3 #BEBADA #FB8072 #80B1D3 #FDB462 #B3DE69 #FCCDE5 #D9D9D9)
ARRAY_MAX_LENGTH =
1000
STRING_MAX_LENGTH =
ARRAY_MAX_LENGTH * 100
TSV_MAX_FIELDS =
100
TSV_MAX_ROWS =
100
HASH2MD5_MAX_STRING_LENGTH =
1000
HASH2MD5_MAX_ARRAY_LENGTH =
100
MUTEX_FOR_THREAD_EXCLUSIVE =
Mutex.new
PUSHBULLET_KEY =
begin
  if ENV["PUSHBULLET_KEY"]
    ENV["PUSHBULLET_KEY"]
  else
    config_api = File.join(ENV['HOME'], 'config/apps/pushbullet/apikey')
    if File.exist? config_api
      File.read(config_api).strip
    else
      nil
    end
  end
end

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.sensiblewrite_dirObject

Returns the value of attribute sensiblewrite_dir.



13
14
15
# File 'lib/rbbt/util/misc/pipes.rb', line 13

def sensiblewrite_dir
  @sensiblewrite_dir
end

.sensiblewrite_lock_dirObject

Returns the value of attribute sensiblewrite_lock_dir.



5
6
7
# File 'lib/rbbt/util/misc/pipes.rb', line 5

def sensiblewrite_lock_dir
  @sensiblewrite_lock_dir
end

Class Method Details

._convert_match_condition(condition) ⇒ Object



28
29
30
31
32
33
34
35
36
37
# File 'lib/rbbt/util/misc.rb', line 28

def self._convert_match_condition(condition)
  return true if condition == 'true'
  return false if condition == 'false'
  return condition.to_regexp if condition[0] == "/"
  return [:cmp, $1, $2.to_f] if condition =~ /^([<>]=?)(.*)/
  return [:invert, _convert_match_condition(condition[1..-1].strip)] if condition[0] == "!"
  #return {$1 => $2.to_f} if condition =~ /^([<>]=?)(.*)/
  #return {false => _convert_match_condition(condition[1..-1].strip)} if condition[0] == "!"
  return condition
end

._paste_streams(streams, output, lines = nil, sep = "\t", header = nil, &block) ⇒ Object



540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
# File 'lib/rbbt/util/misc/pipes.rb', line 540

def self._paste_streams(streams, output, lines = nil, sep = "\t", header = nil, &block)
  output.puts header if header
  streams = streams.collect do |stream|
    if defined? Step and Step === stream
      stream.get_stream || stream.join.path.open
    else
      stream
    end
  end

  begin
    done_streams = []
    lines ||= streams.collect{|s| s.gets }
    keys = []
    parts = []
    lines.each_with_index do |line,i|
      if line.nil?
        keys[i] = nil
        parts[i] = []
      else
        key, *p = line.chomp.split(sep, -1) 
        keys[i] = key
        parts[i] = p
      end
    end
    sizes = parts.collect{|p| p.nil? ? 0 : p.length }
    last_min = nil
    while lines.compact.any?
      if block_given?
        min = keys.compact.sort(&block).first
      else
        min = keys.compact.sort.first
      end
      str = []
      keys.each_with_index do |key,i|
        case key
        when min
          str << [parts[i] * sep]
          line = lines[i] = streams[i].gets
          if line.nil?
            keys[i] = nil
            parts[i] = nil
          else
            k, *p = line.chomp.split(sep, -1)
            keys[i] = k
            parts[i] = p
          end
        else
          str << [sep * (sizes[i]-1)] if sizes[i] > 0
        end
      end

      output.puts [min, str*sep] * sep
    end
    streams.each do |stream|
      stream.join if stream.respond_to? :join
    end
  rescue 
    Log.exception $!
    streams.each do |stream|
      stream.abort if stream.respond_to? :abort
    end
    raise $!
  end
end

._untar_cmd(io, destination) ⇒ Object



86
87
88
89
90
# File 'lib/rbbt/util/tar.rb', line 86

def self._untar_cmd(io, destination)
  FileUtils.mkdir_p destination unless File.exist? destination
  CMD.cmd_log("tar xvf - -C '#{destination}'", :in => io)
  nil
end

._zip_fields(array, max = nil) ⇒ Object



66
67
68
69
70
71
72
73
74
75
# File 'lib/rbbt/util/misc/objects.rb', line 66

def self._zip_fields(array, max = nil)
  return [] if array.nil? or array.empty? or (first = array.first).nil?
  max = array.collect{|l| l.length}.max if max.nil?
  rest = array[1..-1].collect{|v|
    v.length == 1 & max > 1 ? v * max : v
  }
  first = first * max if first.length == 1 and max > 1

  first.zip(*rest)
end

.add_defaults(options, defaults = {}) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/rbbt/util/misc/options.rb', line 120

def self.add_defaults(options, defaults = {})
  options ||= {}
  case
  when Hash === options
    new_options = options.dup
  when String === options
    new_options = string2hash options
  else
    raise "Format of '#{options.inspect}' not understood. It should be a hash"
  end

  defaults.each do |key, value|
    next if options.include? key

    new_options[key] = value 
  end

  new_options
end

.add_GET_param(url, param, value) ⇒ Object



78
79
80
81
82
83
84
# File 'lib/rbbt/util/misc/options.rb', line 78

def self.add_GET_param(url, param, value)
  clean_url, param_str = url.split("?")
  hash = param_str.nil? ? {} : self.GET_params2hash(param_str)
  IndiferentHash.setup hash
  hash[param] = value
  clean_url << "?" << hash2GET_params(hash)
end

.add_libdir(dir = nil) ⇒ Object



3
4
5
6
# File 'lib/rbbt/util/misc/development.rb', line 3

def self.add_libdir(dir=nil)
  dir ||= File.join(Path.caller_lib_dir(caller.first), 'lib')
  $LOAD_PATH.unshift(dir) unless $LOAD_PATH.include? dir
end

.add_stream_filename(io, filename) ⇒ Object



688
689
690
691
692
693
694
695
# File 'lib/rbbt/util/misc/pipes.rb', line 688

def self.add_stream_filename(io, filename)
  if ! io.respond_to? :filename
    class << io
      attr_accessor :filename
    end
    io.filename = filename
  end
end

.append_zipped(current, new) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/rbbt/util/misc/objects.rb', line 50

def self.append_zipped(current, new)
  if current.empty?
    current.replace new.collect{|e| [e]}
  else
    current.each do |v|
      n = new.shift
      if Array === n
        v.concat new
      else
        v << n
      end
    end
  end
  current
end

.array2hash(array, default = nil) ⇒ Object



24
25
26
27
28
29
30
31
# File 'lib/rbbt/util/misc/options.rb', line 24

def self.array2hash(array, default = nil)
  hash = {}
  array.each do |key, value|
    value = default.dup if value.nil? and not default.nil?
    hash[key] = value
  end
  hash
end

.benchmark(repeats = 1, message = nil) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/rbbt/util/misc/development.rb', line 32

def self.benchmark(repeats = 1, message = nil)
  require 'benchmark'
  res = nil
  begin
    measure = Benchmark.measure do
      repeats.times do
        res = yield
      end
    end
    if message
      puts "#{message }: #{ repeats } repeats"
    else
      puts "Benchmark for #{ repeats } repeats"
    end
    puts measure
  rescue Exception
    puts "Benchmark aborted"
    raise $!
  end
  res
end

.binary_include?(array, elem) ⇒ Boolean

Returns:

  • (Boolean)


111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/rbbt/util/misc/manipulation.rb', line 111

def self.binary_include?(array, elem)
  upper = array.size - 1
  lower = 0

  return -1 if upper < lower

  while(upper >= lower) do
    idx = lower + (upper - lower) / 2
    value = array[idx]

    case elem <=> value
    when 0
      return true
    when -1
      upper = idx - 1
    when 1
      lower = idx + 1
    else
      raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
    end
  end

  return false
end

.bootstrap(elems, num = :current, options = {}, &block) ⇒ Object



295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
# File 'lib/rbbt/util/misc/development.rb', line 295

def self.bootstrap(elems, num = :current, options = {}, &block)
  IndiferentHash.setup options
  num = :current if num.nil?
  cpus = case num
         when :current
          n = 10
          n = elems.length / 2 if n > elems.length/2
          n
         when String
           num.to_i
         when Integer
           if num < 100
             num
           else
             32000 / num
           end
         else
           raise "Parameter 'num' not understood: #{Misc.fingerprint num}"
         end


  options = Misc.add_defaults options, :respawn => true, :cpus => cpus
  options = Misc.add_defaults options, :bar => "Bootstrap in #{ options[:cpus] } cpus: #{ Misc.fingerprint Annotated.purge(elems) }"
  respawn = options[:respawn] and options[:cpus] and options[:cpus].to_i > 1

  index = (0..elems.length-1).to_a.collect{|v| v.to_s }
  TSV.traverse index, options do |pos|
    elem = elems[pos.to_i]
    elems.annotate elem if elems.respond_to? :annotate
    res = begin
            yield elem
          rescue Interrupt
            Log.warn "Process #{Process.pid} was aborted"
            raise $!
          end
    res = nil unless options[:into]
    raise RbbtProcessQueue::RbbtProcessQueueWorker::Respawn, res if respawn == :always and cpus > 1
    res
  end
end

.bootstrap_in_threads(elems, num = :current, options = {}, &block) ⇒ Object



336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
# File 'lib/rbbt/util/misc/development.rb', line 336

def self.bootstrap_in_threads(elems, num = :current, options = {}, &block)
  IndiferentHash.setup options
  num = :current if num.nil?
  threads = case num
         when :current
          10
         when String
           num.to_i
         when Integer
           if num < 100
             num
           else
             32000 / num
           end
         else
           raise "Parameter 'num' not understood: #{Misc.fingerprint num}"
         end


  options = Misc.add_defaults options, :respawn => true, :threads => threads
  options = Misc.add_defaults options, :bar => "Bootstrap in #{ options[:threads] } threads: #{ Misc.fingerprint Annotated.purge(elems) }"

  index = (0..elems.length-1).to_a.collect{|v| v.to_s }
  TSV.traverse index, options do |pos|
    elem = elems[pos.to_i]
    elems.annotate elem if elems.respond_to? :annotate
    begin
      res = yield elem
    rescue Interrupt
      Log.warn "Process #{Process.pid} was aborted"
      raise $!
    end
    res = nil unless options[:into]
    res
  end
end

.break_lines(text, char_size = 80) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/rbbt/util/misc/inspect.rb', line 7

def self.break_lines(text, char_size=80)
  text = text.gsub("\n", " ")
  lines = []
  line = []
  text.split(/([\s\-]+)/).each do |part|
    if line.join("").length + part.length > char_size
      lines << line * ""
      line = []
    end
    line << part
  end

  lines << line * ""

  lines.flatten.collect{|l| l.strip} * "\n"
end

.camel_case(string) ⇒ Object



84
85
86
87
88
89
# File 'lib/rbbt/util/misc/format.rb', line 84

def self.camel_case(string)
  return string if string !~ /_/ && string =~ /[A-Z]+.*/
    string.split(/_|(\d+)/).map{|e| 
      (e =~ /^[A-Z]{2,}$/ ? e : e.capitalize) 
    }.join
end

.camel_case_lower(string) ⇒ Object



91
92
93
94
95
# File 'lib/rbbt/util/misc/format.rb', line 91

def self.camel_case_lower(string)
    string.split('_').inject([]){ |buffer,e| 
      buffer.push(buffer.empty? ? e.downcase : (e =~ /^[A-Z]{2,}$/ ? e : e.capitalize)) 
    }.join
end

.choose(array, select) ⇒ Object



96
97
98
# File 'lib/rbbt/util/misc/objects.rb', line 96

def self.choose(array, select)
  array.zip(select).select{|e,s| s }.collect{|e,s| e }
end

.chr_cmp_contigs(chr1, chr2, contigs) ⇒ Object



350
351
352
# File 'lib/rbbt/util/misc/omics.rb', line 350

def self.chr_cmp_contigs(chr1, chr2, contigs)
  contigs.index(chr1) <=> contigs.index(chr2)
end

.chr_cmp_strict(chr1, chr2) ⇒ Object



336
337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/rbbt/util/misc/omics.rb', line 336

def self.chr_cmp_strict(chr1, chr2)
  chr1 = chr1.sub(/^chr/, '')
  chr2 = chr2.sub(/^chr/, '')
  if (m1 = chr1.match(/(\d+)$/)) && (m2 = chr2.match(/(\d+)$/))
    m1[1].to_i <=> m2[1].to_i
  elsif chr1 =~ /\d+$/
    -1
  elsif chr2 =~ /\d+$/
    1
  else
    chr1 <=> chr2
  end
end

.collapse_ranges(ranges) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/rbbt/util/misc/manipulation.rb', line 2

def self.collapse_ranges(ranges)
  processed = []
  last = nil
  final = []
  ranges.sort_by{|range| range.begin }.each do |range|
    rbegin = range.begin
    rend = range.end
    if last.nil? or rbegin > last
      processed << [rbegin, rend]
      last = rend
    else
     new_processed = []
      processed.each do |pbegin,pend|
        if pend < rbegin
          final << [pbegin, pend]
        else
          eend = [rend, pend].max
          new_processed << [pbegin, eend]
          break
        end
      end
      processed = new_processed
      last = rend if rend > last
    end
  end

  final.concat processed
  final.collect{|b,e| (b..e)}
end

.collapse_stream(s, line = nil, sep = "\t", header = nil, &block) ⇒ Object



487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
# File 'lib/rbbt/util/misc/pipes.rb', line 487

def self.collapse_stream(s, line = nil, sep = "\t", header = nil, &block)
  sep ||= "\t"
  Misc.open_pipe do |sin|
    sin.puts header if header
    process_stream(s) do |s|
      line ||= s.gets

      current_parts = []
      while line 
        key, *parts = line.chomp.split(sep, -1)
        case
        when key.nil?
        when current_parts.nil?
          current_parts = parts
          current_key = key
        when current_key == key
          parts.each_with_index do |part,i|
            if current_parts[i].nil?
              current_parts[i] = "|" << part
            else
              current_parts[i] = current_parts[i] << "|" << part
            end
          end

          (parts.length..current_parts.length-1).to_a.each do |pos|
            current_parts[pos] = current_parts[pos] << "|" << ""
          end
        when current_key.nil?
          current_key = key
          current_parts = parts
        when current_key != key
          if block_given?
            res = block.call(current_parts)
            sin.puts [current_key, res] * sep
          else
            sin.puts [current_key, current_parts].flatten * sep
          end 
          current_key = key
          current_parts = parts
        end
        line = s.gets
      end

      if block_given?
        res = block.call(current_parts)
        sin.puts [current_key, res] * sep
      else
        sin.puts [current_key, current_parts].flatten * sep
      end unless current_key.nil?
    end
  end
end

.colors_for(list) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rbbt/util/misc/format.rb', line 4

def self.colors_for(list)
  unused = COLOR_LIST.dup

  used = {}
  colors = list.collect do |elem|
    if used.include? elem
      used[elem]
    else
      color = unused.shift
      used[elem]=color
      color
    end
  end

  [colors, used]
end

.common_path(dir, file) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/rbbt/util/misc/system.rb', line 75

def self.common_path(dir, file)
  file = File.expand_path file
  dir = File.expand_path dir

  return true if file == dir
  while File.dirname(file) != file
    file = File.dirname(file)
    return true if file == dir
  end

  return false
end

.compare_lines(stream1, stream2, args, sort = false) ⇒ Object



651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
# File 'lib/rbbt/util/misc/pipes.rb', line 651

def self.compare_lines(stream1, stream2, args, sort = false)
  if sort
    stream1 = Misc.sort_stream stream1
    stream2 = Misc.sort_stream stream2
    compare_lines(stream1, stream2, args, false)
  else
    erase = []

    if Path === stream1 or (String === stream1 and File.exist? stream1)
      file1 = stream1
    else
      file1 = TmpFile.tmp_file
      erase << file1
      Misc.consume_stream(TSV.get_stream(stream1), false, file1)
    end

    if Path === stream2 or (String === stream2 and File.exist? stream2)
      file2 = stream2
    else
      file2 = TmpFile.tmp_file
      erase << file2
      Misc.consume_stream(TSV.get_stream(stream2), false, file2)
    end

    CMD.cmd("env LC_ALL=C comm #{args} '#{file1}' '#{file2}'", :pipe => true, :post => Proc.new{ erase.each{|f| FileUtils.rm f } }) 
  end
end

.consolidate(list) ⇒ Object



39
40
41
42
43
44
45
46
47
48
# File 'lib/rbbt/util/misc/objects.rb', line 39

def self.consolidate(list)
  list.inject(nil){|acc,e|
    if acc.nil?
      acc = e
    else
      acc.concat e
      acc
    end
  }
end

.consume_stream(io, in_thread = false, into = nil, into_close = true, &block) ⇒ Object



283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
# File 'lib/rbbt/util/misc/pipes.rb', line 283

def self.consume_stream(io, in_thread = false, into = nil, into_close = true, &block)
  return if Path === io
  return unless io.respond_to? :read 

  if io.respond_to? :closed? and io.closed?
    io.join if io.respond_to? :join
    return
  end

  if in_thread
    Thread.new(Thread.current) do |parent|
      begin
        consume_stream(io, false, into, into_close)
      rescue Exception
        parent.raise $!
      end
    end
  else
    if into
      Log.medium "Consuming stream #{Misc.fingerprint io} -> #{Misc.fingerprint into}"
    else
      Log.medium "Consuming stream #{Misc.fingerprint io}"
    end

    begin
      into = into.find if Path === into
      if String === into 
        dir = File.dirname(into)
        Open.mkdir dir unless Open.exists?(dir)
        into_path, into = into, Open.open(into, :mode => 'w') 
      end
      into.sync = true if IO === into
      into_close = false unless into.respond_to? :close
      io.sync = true

      begin
        while c = io.readpartial(BLOCK_SIZE)
          into << c if into
        end
      rescue EOFError
      end

      io.join if io.respond_to? :join
      io.close unless io.closed?
      into.close if into and into_close and not into.closed?
      into.join if into and into_close and into.respond_to?(:joined?) and not into.joined?
      block.call if block_given?

      #Log.medium "Done consuming stream #{Misc.fingerprint io}"
    rescue Aborted
      Log.medium "Consume stream aborted #{Misc.fingerprint io}"
      io.abort if io.respond_to? :abort
      #io.close unless io.closed?
      FileUtils.rm into_path if into_path and File.exists? into_path
    rescue Exception
      Log.medium "Exception consuming stream: #{Misc.fingerprint io}: #{$!.message}"
      io.abort $! if io.respond_to? :abort
      FileUtils.rm into_path if into_path and File.exists? into_path
      raise $!
    end
  end
end

.correct_icgc_mutation(pos, ref, mut_str) ⇒ Object



119
120
121
122
123
124
# File 'lib/rbbt/util/misc/omics.rb', line 119

def self.correct_icgc_mutation(pos, ref, mut_str)
  mut = mut_str
  mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
    mut = "+" << mut if ref == '-'
  [pos, [mut]]
end

.correct_mutation(pos, ref, mut_str) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/rbbt/util/misc/omics.rb', line 126

def self.correct_mutation(pos, ref, mut_str)
  muts = mut_str.nil? ? [] : mut_str.split(',')
  muts.collect!{|m| m == '<DEL>' ? '-' : m }

  ref = '' if ref == '-'
  while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
    ref = ref[1..-1]
    raise "REF nil" if ref.nil?
    pos = pos + 1
    muts = muts.collect{|m| m[1..-1]}
  end

  muts = muts.collect do |m|
    m = '' if m == '-'
    case
    when ref.empty?
      "+" << m
    when (m.length < ref.length and (m.empty? or ref.index(m)))
      "-" * (ref.length - m.length)
    when (ref.length == 1 and m.length == 1)
      m
    else
      if ref == '-'
        res = '+' + m
      else
        res = '-' * ref.length
        res << m unless m == '-'
      end
      Log.debug{"Non-standard annotation: #{[ref, m]} (#{ muts }) => #{ res }"}

      res
    end
  end

  [pos, muts]
end

.correct_vcf_mutation(pos, ref, mut_str) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/rbbt/util/misc/omics.rb', line 163

def self.correct_vcf_mutation(pos, ref, mut_str)
  muts = mut_str.nil? ? [] : mut_str.split(',')
  muts.collect!{|m| m == '<DEL>' ? '-' : m }

  while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
    ref = ref[1..-1]
    raise "REF nil" if ref.nil?
    pos = pos + 1 unless ref.empty?
    muts = muts.collect{|m| m[1..-1]}
  end

  muts = muts.collect do |m|
    case
    when (ref.empty? or ref == '-')
      "+" << m
    when (m.length < ref.length and (m.empty? or ref.index(m)))
      "-" * (ref.length - m.length)
    when (ref.length == 1 and m.length == 1)
      m
    when (ref == m)
      nil
    else
      if ref == '-'
        res = '+' + m
      else
        if ref[0] == m[0]
          res = '+' << m[1..-1]
        else
          res = '-' * ref.length
          res << m unless m == '-'
        end
      end
      Log.debug{"Non-standard annotation: #{[ref, m]} (#{ muts }) => #{ res }"}

      res
    end
  end.compact

  [pos, muts]
end

.counts(array) ⇒ Object



78
79
80
81
82
83
84
85
86
# File 'lib/rbbt/util/misc/math.rb', line 78

def self.counts(array)
  counts = {}
  array.each do |e|
    counts[e] ||= 0
    counts[e] += 1
  end

  counts
end

.digest(text) ⇒ Object



166
167
168
# File 'lib/rbbt/util/misc/inspect.rb', line 166

def self.digest(text)
  Digest::MD5.hexdigest(text)
end

.divide(array, num) ⇒ Object

Divides the array into num chunks of the same size by placing one element in each chunk iteratively.



193
194
195
196
197
198
199
200
201
202
# File 'lib/rbbt/util/misc/development.rb', line 193

def self.divide(array, num)
  num = 1 if num == 0
  chunks = []
  num.to_i.times do chunks << [] end
  array.each_with_index{|e, i|
    c = i % num
    chunks[c] << e
  }
  chunks
end

.do_once(&block) ⇒ Object



126
127
128
129
130
131
# File 'lib/rbbt/util/misc/development.rb', line 126

def self.do_once(&block)
  return nil if $__did_once
  $__did_once = true
  yield
  nil
end

.dup_stream(stream) ⇒ Object



262
263
264
# File 'lib/rbbt/util/misc/pipes.rb', line 262

def self.dup_stream(stream)
  dup_stream_multiple(stream, 1).first
end

.dup_stream_multiple(stream, num = 1) ⇒ Object



250
251
252
253
254
255
256
257
258
259
260
# File 'lib/rbbt/util/misc/pipes.rb', line 250

def self.dup_stream_multiple(stream, num = 1)
  stream_dup = stream.dup
  if stream.respond_to? :annotate
    stream.annotate stream_dup
    stream.clear
  end
  tee1, *rest = Misc.tee_stream stream_dup, num + 1
  stream.reopen(tee1)
  tee1.annotate(stream)
  rest
end

.ensembl_server(organism) ⇒ Object



306
307
308
309
310
311
312
313
# File 'lib/rbbt/util/misc/omics.rb', line 306

def self.ensembl_server(organism)
  date = organism.split("/")[1]
  if date.nil?
    "www.ensembl.org"
  else
    "#{ date }.archive.ensembl.org"
  end
end

.env_add(var, value, sep = ":", prepend = true) ⇒ Object



38
39
40
41
42
43
44
45
46
# File 'lib/rbbt/util/misc/system.rb', line 38

def self.env_add(var, value, sep = ":", prepend = true)
  ENV[var] ||= ""
  return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
    if prepend
      ENV[var] = value + sep + ENV[var]
    else
      ENV[var] += sep + ENV[var]
    end
end

.field_position(fields, field, quiet = false) ⇒ Object

Raises:



100
101
102
103
104
105
106
107
# File 'lib/rbbt/util/misc/objects.rb', line 100

def self.field_position(fields, field, quiet = false)
  return field if Integer === field or Range === field
  raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
  fields.each_with_index{|f,i| return i if f == field}
  field_re = Regexp.new /^#{field}$/i
  fields.each_with_index{|f,i| return i if f =~ field_re}
  raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
end

.file2md5(file) ⇒ Object



377
378
379
380
381
382
383
384
385
386
387
388
# File 'lib/rbbt/util/misc/inspect.rb', line 377

def self.file2md5(file)
  if File.exists?(file + '.md5')
    Open.read(file + '.md5')
  else
    md5 = CMD.cmd("md5sum '#{file}'").read.strip.split(" ").first
    begin
      Open.write(file + '.md5', md5)
    rescue
    end
    md5
  end
end

.fingerprint(obj) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/rbbt/util/misc/inspect.rb', line 45

def self.fingerprint(obj)
  case obj
  when nil
    "nil"
  when (defined? Step and Step)
    "<Step:"  << (obj.short_path || Misc.fingerprint([obj.task.name, obj.inputs])) << ">"
  when TrueClass
    "true"
  when FalseClass
    "false"
  when Symbol
    ":" << obj.to_s
  when String
    if obj.length > 100
      "'" << obj.slice(0,30) << "<...#{obj.length}...>" << obj.slice(-10,30)<< "'"
    else 
      "'" << obj << "'"
    end
  when (defined? AnnotatedArray and AnnotatedArray)
    "<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
  when (defined? TSV and TSV::Parser)
    filename = obj.filename
    filename = "STDIN(#{rand})" if filename == '-'
    "<TSVStream:" + (filename || "NOFILENAME") + "--" << Misc.fingerprint(obj.options) << ">"
  when IO
    (obj.respond_to?(:filename) and obj.filename ) ? "<IO:" + (obj.filename || obj.inspect + rand(100000)) + ">" : obj.inspect
  when File
    "<File:" + obj.path + ">"
  when NamedArray
    "[<NamedArray: fields=#{fingerprint obj.fields} -- values=#{fingerprint obj[0..-1]}]"
  when Array
    if (length = obj.length) > 10
      "[#{length}--" <<  (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
    else
      "[" << (obj.collect{|e| fingerprint(e) } * ", ") << "]"
    end
  when (defined? TSV and TSV)
    obj.with_unnamed do
      "TSV:{"<< fingerprint(obj.all_fields|| []).inspect << ";" << fingerprint(obj.keys).inspect << "}"
    end
  when Hash
    if obj.length > 10
      "H:{"<< fingerprint(obj.keys) << ";" << fingerprint(obj.values) << "}"
    else
      new = "{"
      obj.each do |k,v|
        new << fingerprint(k) << '=>' << fingerprint(v) << ' '
      end
      if new.length > 1
         new[-1] =  "}"
      else
        new << '}'
      end
      new
    end
  when Float
    if obj.abs > 10
      "%.1f" % obj
    elsif obj.abs > 1
      "%.3f" % obj
    else
      "%.6f" % obj
    end
  else
    obj.to_s
  end
end

.fixascii(string) ⇒ Object



151
152
153
154
155
156
157
# File 'lib/rbbt/util/misc/format.rb', line 151

def self.fixascii(string)
  if string.respond_to?(:encode)
    self.fixutf8(string).encode("ASCII-8BIT") 
  else
    string
  end
end

.fixutf8(string) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/rbbt/util/misc/format.rb', line 163

def self.fixutf8(string)
  return nil if string.nil?
  return string if (string.respond_to? :valid_encoding? and string.valid_encoding?) or
  (string.respond_to? :valid_encoding and string.valid_encoding)

  if string.respond_to?(:encode)
    #string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
    string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
  else
    require 'iconv'
    @@ic ||= Iconv.new('UTF-8//IGNORE', 'UTF-8')
    @@ic.iconv(string)
  end
end

.format_definition_list(defs, size = 80, indent = 20, color = :yellow, sep = "\n\n") ⇒ Object



75
76
77
78
79
80
81
82
# File 'lib/rbbt/util/misc/format.rb', line 75

def self.format_definition_list(defs, size = 80, indent = 20, color = :yellow, sep = "\n\n")
  entries = []
  defs.each do |dt,dd|
    text = format_definition_list_item(dt,dd,size,indent,color)
    entries << text
  end
  entries * sep 
end

.format_definition_list_item(dt, dd, size = 80, indent = 20, color = :yellow) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/rbbt/util/misc/format.rb', line 57

def self.format_definition_list_item(dt, dd, size = 80, indent = 20, color = :yellow)
  dd = "" if dd.nil?
  dt = Log.color color, dt if color
  dt = dt.to_s  unless dd.empty?
  len = Log.uncolor(dt).length

  if indent < 0
    text = format_paragraph(dd, size, indent.abs-1, 0)
    text = dt << "\n" << text
  else
    offset = len - indent
    offset = 0 if offset < 0
    text = format_paragraph(dd, size, indent.abs+1, offset)
    text[0..len-1] = dt
  end
  text
end

.format_paragraph(text, size = 80, indent = 0, offset = 0) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/rbbt/util/misc/format.rb', line 28

def self.format_paragraph(text, size = 80, indent = 0, offset = 0)
  i = 0
  re = /((?:\n\s*\n\s*)|(?:\n\s*(?=\*)))/
    text.split(re).collect do |paragraph|
    i += 1
    str = if i % 2 == 1
            words = paragraph.gsub(/\s+/, "\s").split(" ")
            lines = []
            line = " "*offset
            word = words.shift
            while word
              word = word[0..size-indent-offset-4] + '...' if word.length >= size - indent - offset
              while word and Log.uncolor(line).length + Log.uncolor(word).length <= size - indent
                line << word << " "
                word = words.shift
              end
              offset = 0
              lines << ((" " * indent) << line[0..-2])
              line = ""
            end
            (lines * "\n")
          else
            paragraph
          end
    offset = 0
    str
    end*""
end

.format_seconds(time, extended = false) ⇒ Object



21
22
23
24
25
26
# File 'lib/rbbt/util/misc/format.rb', line 21

def self.format_seconds(time, extended = false)
  seconds = time.to_i
  str = [seconds/3600, seconds/60 % 60, seconds % 60].map{|t|  "%02i" % t }.join(':')
  str << ".%02i" % ((time - seconds) * 100) if extended
  str
end

.genomic_location_cmp(gpos1, gpos2, sep = ":") ⇒ Object



354
355
356
357
358
359
360
361
362
363
364
# File 'lib/rbbt/util/misc/omics.rb', line 354

def self.genomic_location_cmp(gpos1, gpos2, sep = ":")
  chr1, _sep, pos1 = gpos1.partition(sep)
  chr2, _sep, pos2 = gpos2.partition(sep)
  cmp = chr1 <=> chr2
  case cmp
  when 0
    pos1.to_i <=> pos2.to_i
  else
    cmp
  end
end

.genomic_location_cmp_contigs(gpos1, gpos2, contigs, sep = ":") ⇒ Object



378
379
380
381
382
383
384
385
386
387
388
# File 'lib/rbbt/util/misc/omics.rb', line 378

def self.genomic_location_cmp_contigs(gpos1, gpos2, contigs, sep = ":")
  chr1, _sep, pos1 = gpos1.partition(sep)
  chr2, _sep, pos2 = gpos2.partition(sep)
  cmp = chr_cmp_contigs(chr1, chr2, contigs)
  case cmp
  when 0
    pos1.to_i <=> pos2.to_i
  else
    cmp
  end
end

.genomic_location_cmp_strict(gpos1, gpos2, sep = ":") ⇒ Object



366
367
368
369
370
371
372
373
374
375
376
# File 'lib/rbbt/util/misc/omics.rb', line 366

def self.genomic_location_cmp_strict(gpos1, gpos2, sep = ":")
  chr1, _sep, pos1 = gpos1.partition(sep)
  chr2, _sep, pos2 = gpos2.partition(sep)
  cmp = chr_cmp_strict(chr1, chr2)
  case cmp
  when 0
    pos1.to_i <=> pos2.to_i
  else
    cmp
  end
end

.get_filename(obj) ⇒ Object



390
391
392
393
394
395
396
397
398
399
400
# File 'lib/rbbt/util/misc/inspect.rb', line 390

def self.get_filename(obj)
  if obj.respond_to? :filename
    obj.filename
  elsif obj.respond_to? :path
    obj.path
  elsif (Path === obj || (String === obj && Misc.is_filename?(obj)))
    obj
  else
    nil
  end
end

.GET_params2hash(string) ⇒ Object



54
55
56
57
58
59
60
61
# File 'lib/rbbt/util/misc/options.rb', line 54

def self.GET_params2hash(string)
  hash = {}
  string.split('&').collect{|item|
    key, value = item.split("=").values_at 0, 1
    hash[key] = value.nil? ? "" : CGI.unescape(value)
  }
  hash
end

.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/rbbt/util/misc/math.rb', line 106

def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
  name1 ||= "list 1"
  name2 ||= "list 2"
  name3 ||= "list 3"

  sizes = [list1, list2, list3, list1 & list2, list1 & list3, list2 & list3, list1 & list2 & list3].collect{|l| l.length}

  total = total.length if Array === total

  label = "#{name1}: #{sizes[0]} (#{name2}: #{sizes[3]}, #{name3}: #{sizes[4]})"
  label << "|#{name2}: #{sizes[1]} (#{name1}: #{sizes[3]}, #{name3}: #{sizes[5]})"
  label << "|#{name3}: #{sizes[2]} (#{name1}: #{sizes[4]}, #{name2}: #{sizes[5]})"
  if total
    label << "| INTERSECTION: #{sizes[6]} TOTAL: #{total}"
  else
    label << "| INTERSECTION: #{sizes[6]}"
  end

  max = total || sizes.max
  sizes = sizes.collect{|v| (v.to_f/max * 100).to_i.to_f / 100}
  url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
end

.gzip(tarfile) ⇒ Object

gzips the underlying string in the given StringIO, returning a new StringIO representing the compressed file.



66
67
68
69
70
71
72
73
74
75
# File 'lib/rbbt/util/tar.rb', line 66

def self.gzip(tarfile)
  gz = StringIO.new("")
  z = Zlib::GzipWriter.new(gz)
  z.write tarfile.string
  z.close # this is necessary!

  # z was closed to write the gzip footer, so
  # now we need a new StringIO
  StringIO.new gz.string
end

.hash2GET_params(hash) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/rbbt/util/misc/options.rb', line 63

def self.hash2GET_params(hash)
  hash.sort_by{|k,v| k.to_s}.collect{|k,v| 
    next unless %w(Symbol String Float Fixnum Integer Numeric TrueClass FalseClass Module Class Object Array).include? v.class.to_s
    v = case 
        when Symbol === v
          v.to_s
        when Array === v
          v * ","
        else
          CGI.escape(v.to_s).gsub('%2F','/')
        end
    [ Symbol === k ? k.to_s : k,  v] * "="
  }.compact * "&"
end

.hash2md5(hash) ⇒ Object



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'lib/rbbt/util/misc/inspect.rb', line 184

def self.hash2md5(hash)
  return "" if hash.nil? or hash.empty?

  str = ""
  keys = hash.keys
  keys = keys.clean_annotations if keys.respond_to? :clean_annotations
  keys = keys.sort_by{|k| k.to_s}

  if hash.respond_to? :unnamed
    unnamed = hash.unnamed
    hash.unnamed = true 
  end


  keys.each do |k|
    next if k == :monitor or k == "monitor" or k == :in_situ_persistence or k == "in_situ_persistence"
    _v = hash[k]
    _k = k
    v = TSV === _v ? _v : Annotated.purge(_v)
    k = Annotated.purge(k)

    case
    when TrueClass === v
      str << k.to_s << "=>true" 
    when FalseClass === v
      str << k.to_s << "=>false" 
    when TSV === v
      str << k.to_s << "=>" << obj2md5(v)
    when Hash === v
      str << k.to_s << "=>" << hash2md5(v)
    when Symbol === v
      str << k.to_s << "=>" << v.to_s
    when (String === v and v.length > HASH2MD5_MAX_STRING_LENGTH)
      #str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << v[v.length-3..v.length+3] << v[-3..-1] << "; #{ v.length }"
      str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << "; #{ v.length }"
    when String === v
      str << k.to_s << "=>" << v
    when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
      #str << k.to_s << "=>[" << (v[0..HASH2MD5_MAX_ARRAY_LENGTH] + v[v.length-3..v.length+3] + v[-3..-1]) * "," << "; #{ v.length }]"
      str << k.to_s << "=>[" << v[0..HASH2MD5_MAX_ARRAY_LENGTH] * "," << "; #{ v.length }]"
    when TSV::Parser === v
      str << remove_long_items(v)
    when Array === v
      str << k.to_s << "=>[" << v * "," << "]"
    when File === v
      str << k.to_s << "=>[File:" << v.path << "]"
    else
      begin
        v_ins = v.inspect
      rescue
        v_ins = "#Object:" << v.object_id.to_s
      end

      case
      when v_ins =~ /:0x0/
        str << k.to_s << "=>" << v_ins.sub(/:0x[a-f0-9]+@/,'')
      else
        str << k.to_s << "=>" << v_ins
      end
    end

    if _v and defined? Annotated and Annotated === _v and not (defined? AssociationItem and AssociationItem === _v)
      info = _v.info
      info = Annotated.purge(info)
      str << "_" << hash2md5(info) 
    end
  end
  hash.unnamed = unnamed if hash.respond_to? :unnamed

  if str.empty?
    ""
  else
    digest(str)
  end
end

.hash2string(hash) ⇒ Object



46
47
48
49
50
51
52
# File 'lib/rbbt/util/misc/options.rb', line 46

def self.hash2string(hash)
  hash.sort_by{|k,v| k.to_s}.collect{|k,v| 
    next unless %w(Symbol String Float Fixnum Integer Numeric TrueClass FalseClass Module Class Object).include? v.class.to_s
    [ Symbol === k ? ":" << k.to_s : k,
      Symbol === v ? ":" << v.to_s : v] * "="
  }.compact * "#"
end

.hash_to_html_tag_attributes(hash) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/rbbt/util/misc/options.rb', line 86

def self.hash_to_html_tag_attributes(hash)
  return "" if hash.nil? or hash.empty?
  hash.collect{|k,v| 
    case 
    when (k.nil? or v.nil? or (String === v and v.empty?))
      nil
    when Array === v
      [k,"'" << v * " " << "'"] * "="
    when String === v
      [k,"'" << v << "'"] * "="
    when Symbol === v
      [k,"'" << v.to_s << "'"] * "="
    when TrueClass === v
      [k,"'" << v.to_s << "'"] * "="
    when Numeric === v
      [k,"'" << v.to_s << "'"] * "="
    else
      nil
    end
  }.compact * " "
end

.hostnameObject



5
6
7
# File 'lib/rbbt/util/misc/system.rb', line 5

def self.hostname
  @hostanem ||= `hostname`.strip
end

.html_tag(tag, content = nil, params = {}) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
# File 'lib/rbbt/util/misc/options.rb', line 108

def self.html_tag(tag, content = nil, params = {})
  attr_str = hash_to_html_tag_attributes(params)
  attr_str = " " << attr_str if String === attr_str and attr_str != ""
  html = if content.nil?
    "<#{ tag }#{attr_str}/>"
  else
    "<#{ tag }#{attr_str}>#{ content }</#{ tag }>"
  end

  html
end

.humanize(value, options = {}) ⇒ Object

source: gist.github.com/ekdevdes/2450285 author: Ethan Kramer (github.com/ekdevdes)



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/rbbt/util/misc/format.rb', line 109

def self.humanize(value, options = {})
  if options.empty?
    options[:format] = :sentence
  end

  values = value.to_s.split('_')
  values.each_index do |index|
    # lower case each item in array
    # Miguel Vazquez edit: Except for acronyms
    values[index].downcase! unless values[index].match(/[a-zA-Z][A-Z]/)
  end
  if options[:format] == :allcaps
    values.each do |value|
      value.capitalize!
    end

    if options.empty?
      options[:seperator] = " "
    end

    return values.join " "
  end

  if options[:format] == :class
    values.each do |value|
      value.capitalize!
    end

    return values.join ""
  end

  if options[:format] == :sentence
    values[0].capitalize! unless values[0].match(/[a-zA-Z][A-Z]/)

    return values.join " "
  end

  if options[:format] == :nocaps
    return values.join " "
  end
end

.humanize_list(list) ⇒ Object



178
179
180
181
182
183
184
185
# File 'lib/rbbt/util/misc/format.rb', line 178

def self.humanize_list(list)
  return "" if list.empty?
  if list.length == 1
    list.first
  else
    list[0..-2].collect{|e| e.to_s} * ", " << " and " << list[-1].to_s
  end
end

.in_delta?(a, b, delta = 0.0001) ⇒ Boolean

Returns:

  • (Boolean)


129
130
131
# File 'lib/rbbt/util/misc/math.rb', line 129

def self.in_delta?(a, b, delta = 0.0001)
  (a.to_f - b.to_f).abs < delta
end

.in_dir(dir) ⇒ Object

WARN: probably not thread safe…



89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/rbbt/util/misc/system.rb', line 89

def self.in_dir(dir)
  old_pwd = FileUtils.pwd
  res = nil
  begin
    FileUtils.mkdir_p dir unless File.exist?(dir)
    FileUtils.cd dir
    res = yield
  ensure
    FileUtils.cd old_pwd
  end
  res
end

.index_BED(source, destination, sorted = false) ⇒ Object



451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
# File 'lib/rbbt/util/misc/omics.rb', line 451

def self.index_BED(source, destination, sorted = false)

  pos_function = Proc.new do |k|
    k.split(":").values_at(1, 2).collect{|i| i.to_i}
  end
  if Open.exists? destination
    Persist::Sharder.new destination, false, "fwt", :pos_function => pos_function  do |key|
      key.split(":")[0]
    end
  else
    io = IO === io ? io : Open.open(source) 

    max_size = 0
    nio = Misc.open_pipe do |sin|
      while line = io.gets
        chr, start, eend, id, *rest = line.chomp.split("\t")
        l = id.length
        max_size = l if max_size < l
        chr = chr.sub('chr','')
        sin << [chr, start, eend, id] * "\t" << "\n"
      end
    end
    
    TmpFile.with_file do |tmpfile|
      Misc.consume_stream(nio, false, tmpfile)

      value_size = max_size
      destination = destination.find if Path === destination
      sharder = Persist::Sharder.new destination, true, "fwt", :value_size => value_size, :range => true, :pos_function => pos_function  do |key|
        key.split(":")[0]
      end

      TSV.traverse tmpfile, :type => :array, :bar => "Creating BED index for #{Misc.fingerprint source}" do |line|
        next if line.empty?
        chr, start, eend, id, *rest = line.split("\t")
        key = [chr, start, eend] * ":"
        sharder[key] = id
      end
      sharder.read

      sharder
    end
  end
end

.insist(times = 4, sleep = nil, msg = nil) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/rbbt/util/misc/development.rb', line 137

def self.insist(times = 4, sleep = nil, msg = nil)
  if Array === times
    sleep_array = times
    times = sleep_array.length
    sleep = sleep_array.shift
  end
  try = 0

  if sleep.nil?
    sleep_array = ([0] + [0.001, 0.01, 0.1, 0.5] * (times / 3)).sort[0..times-1]
    sleep = sleep_array.shift
  end

  begin
    yield
  rescue TryAgain
    sleep sleep
    retry
  rescue StopInsist
    raise $!.exception
  rescue Aborted, Interrupt
    if msg
      Log.warn("Not Insisting after Aborted: #{$!.message} -- #{msg}")
    else
      Log.warn("Not Insisting after Aborted: #{$!.message}")
    end
    raise $!
  rescue Exception
    Log.exception $! if ENV["RBBT_LOG_INSIST"] == 'true'
    if msg
      Log.warn("Insisting after exception: #{$!.class} #{$!.message} -- #{msg}")
    elsif FalseClass === msg
      nil
    else
      Log.warn("Insisting after exception:  #{$!.class} #{$!.message}")
    end

    if sleep and try > 0
      sleep sleep
      sleep = sleep_array.shift || sleep if sleep_array
    else
      Thread.pass
    end

    try += 1
    retry if try < times
    raise $!
  end
end

.intercalate_streams(streams) ⇒ Object



634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
# File 'lib/rbbt/util/misc/pipes.rb', line 634

def self.intercalate_streams(streams)
  Misc.open_pipe do |sin|
    continue = true
    while continue
      lines = streams.collect{|stream| stream.eof? ? nil : stream.gets }.compact
      lines.each do |line|
        sin.puts line
      end
      continue = false if lines.empty?
    end
    streams.each do |stream| 
      stream.join if stream.respond_to? :join
      stream.close if stream.respond_to? :close and not stream.closed?
    end
  end
end

.intersect_sorted_arrays(a1, a2) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/rbbt/util/misc/manipulation.rb', line 60

def self.intersect_sorted_arrays(a1, a2)
  e1, e2 = a1.shift, a2.shift
  intersect = []
  while true
    break if e1.nil? or e2.nil?
    case e1 <=> e2
    when 0
      intersect << e1
      e1, e2 = a1.shift, a2.shift
    when -1
      e1 = a1.shift while not e1.nil? and e1 < e2
    when 1
      e2 = a2.shift
      e2 = a2.shift while not e2.nil? and e2 < e1
    end
  end
  intersect
end

.intersect_streams(f1, f2, out, sep = ":") ⇒ Object



393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
# File 'lib/rbbt/util/misc/omics.rb', line 393

def self.intersect_streams(f1, f2, out, sep=":")
  finish = false
  return if f1.eof? or f2.eof?
  line1, chr1, start1, eend1, rest1 = intersect_streams_read(f1,sep)
  line2, chr2, start2, eend2, rest2 = intersect_streams_read(f2,sep)
  while not finish
    cmp = intersect_streams_cmp_chr(chr1,chr2)
    case cmp
    when -1
      move = 1
    when 1
      move = 2
    else
      if eend1 < start2
        move = 1
      elsif eend2 < start1
        move = 2
      else
        pos2 = f2.pos

        sline2, schr2, sstart2, seend2, srest2 = line2, chr2, start2, eend2, rest2
        while chr1 == chr2 and eend1 >= start2
          out.puts line1 + "\t" + line2 if start1 <= eend2
          if f2.eof?
            chr2 = 'next2'
          else
            line2, chr2, start2, eend2, rest2 = intersect_streams_read(f2,sep)
          end
        end
        line2, chr2, start2, eend2, rest2 = sline2, schr2, sstart2, seend2, srest2
        f2.seek(pos2)
        move = 1
      end
    end

    case move
    when 1
      if f1.eof?
        finish = true
      else
        line1, chr1, start1, eend1, rest1 = intersect_streams_read(f1,sep)
      end
    when 2
      if f2.eof?
        finish = true
      else
        line2, chr2, start2, eend2, rest2 = intersect_streams_read(f2,sep)
      end
    end
  end
end

.intersect_streams_cmp_chr(chr1, chr2) ⇒ Object



389
390
391
# File 'lib/rbbt/util/misc/omics.rb', line 389

def self.intersect_streams_cmp_chr(chr1, chr2)
  chr1 <=> chr2
end

.intersect_streams_read(io, sep = ":") ⇒ Object



323
324
325
326
327
328
329
330
331
332
333
334
# File 'lib/rbbt/util/misc/omics.rb', line 323

def self.intersect_streams_read(io, sep=":")
  line = io.gets.chomp
  parts = line.split(sep, -1)
  chr, start, eend, *rest = parts
  start = start.to_i
  if eend =~ /^\d+(\t.*)?$/
    eend = eend.to_i
  else
    eend = start.to_i
  end
  [line,chr, start, eend, rest]
end

.is_filename?(string) ⇒ Boolean Also known as: filename?

Returns:

  • (Boolean)


102
103
104
105
106
107
# File 'lib/rbbt/util/misc/system.rb', line 102

def self.is_filename?(string)
  return true if defined? PATH and Path === string
  return true if string.respond_to? :exists
  return true if String === string and string.length < 265 and File.exist?(string)
  return false
end

.IUPAC_to_base(iupac) ⇒ Object



270
271
272
# File 'lib/rbbt/util/misc/omics.rb', line 270

def self.IUPAC_to_base(iupac)
  IUPAC2BASE[iupac]
end

.line_monitor_stream(stream, &block) ⇒ Object



731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
# File 'lib/rbbt/util/misc/pipes.rb', line 731

def self.line_monitor_stream(stream, &block)
  monitor, out = tee_stream stream
  monitor_thread = Thread.new do
    begin
      while line = monitor.gets
        block.call line
      end
    rescue
      Log.exception $!
      monitor.raise $!
      monitor.close unless monitor.closed?
      monitor.join if monitor.respond_to?(:join) && ! monitor.aborted?
      out.raise $! if out.respond_to?(:raise)
    ensure
      monitor.close unless monitor.closed?
      monitor.join if monitor.respond_to?(:join) && ! monitor.aborted?
    end
  end

  stream.annotate out if stream.respond_to? :annotate
  ConcurrentStream.setup out, :threads => monitor_thread
end

.lock(file, unlock = true, options = {}) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/rbbt/util/misc/lock.rb', line 21

def self.lock(file, unlock = true, options = {})
  unlock, options = true, unlock if Hash === unlock
  return yield if file.nil? and not Lockfile === options[:lock]

  file = file.find if Path === file
  FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exist? File.dirname(File.expand_path(file))


  begin
    case options[:lock]
    when Lockfile
      lockfile = options[:lock]
      lockfile.lock unless lockfile.locked?
    when FalseClass
      lockfile = nil
      unlock = false
    when Path, String
      lock_path = options[:lock].find
      lockfile = Lockfile.new(lock_path, options)
      lockfile.lock
    else
      lock_path = File.expand_path(file + '.lock')
      lockfile = Lockfile.new(lock_path, options)
      lockfile.lock
    end
  rescue Aborted, Interrupt
    raise LockInterrupted
  end

  res = nil

  begin
    res = yield lockfile
  rescue KeepLocked
    unlock = false
    res = $!.payload
  ensure
    if unlock 
      begin
        if lockfile.locked?
          lockfile.unlock 
        else
        end
      rescue Exception
        Log.warn "Exception unlocking: #{lockfile.path}"
        Log.exception $!
      end
    end
  end

  res
end

.lock_in_repo(repo, key, *args) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/rbbt/util/misc/lock.rb', line 76

def self.lock_in_repo(repo, key, *args)
  return yield file, *args if repo.nil? or key.nil?

  lock_key = "lock-" << key

  begin
    if repo[lock_key] and
      Misc.hostname == (info = LOCK_REPO_SERIALIZER.load(repo[lock_key]))["host"] and 
      info["pid"] and not Misc.pid_exists?(info["pid"])

      Log.info("Removing lockfile: #{lock_key}. This pid #{Process.pid}. Content: #{info.inspect}")
      repo.out lock_key 
    end
  rescue
    Log.warn("Error checking lockfile #{lock_key}: #{$!.message}. Removing. Content: #{begin repo[lock_key] rescue "Could not open file" end}")
    repo.out lock_key if repo.include? lock_key
  end

  while repo[lock_key]
    sleep 1
  end
  
  repo[lock_key] = LOCK_REPO_SERIALIZER.dump({:hostname => Misc.hostname, :pid => Process.pid})

  res = yield lock_key, *args

  repo.delete lock_key

  res
end

.log10(x) ⇒ Object



10
11
12
# File 'lib/rbbt/util/misc/math.rb', line 10

def self.log10(x)
  Math.log(x) * Log10Multiplier
end

.log2(x) ⇒ Object



6
7
8
# File 'lib/rbbt/util/misc/math.rb', line 6

def self.log2(x)
  Math.log(x) * Log2Multiplier
end

.match_fields(field1, field2) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
# File 'lib/rbbt/util/misc/objects.rb', line 3

def self.match_fields(field1, field2)
  return true if field1 == field2 
  if m = field1.match(/\((.*)\)/)
    field1 = m[1]
  end

  if m = field2.match(/\((.*)\)/)
    field2 = m[1]
  end

  field1 == field2
end

.match_value(value, condition) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/rbbt/util/misc.rb', line 39

def self.match_value(value, condition)
  condition = _convert_match_condition(condition.strip) if String === condition

  case condition
  when Regexp
    !! value.match(condition)
  when NilClass, TrueClass
    value === TrueClass or (String === value and value.downcase == 'true')
  when FalseClass
    value === FalseClass or (String === value and value.downcase == 'false')
  when String
    Numeric === value ? value.to_f == condition.to_f : value == condition
  when Numeric
    value.to_f == condition.to_f
  when Array
    case condition.first
    when :cmp
      value.to_f.send(condition[1], condition[2])
    when :invert
      ! match_value(value, condition[1] )
    else
      condition.inject(false){|acc,e| acc = acc ? true : match_value(value, e) }
    end
  else
    raise "Condition not understood: #{Misc.fingerprint condition}"
  end
end

.max(list) ⇒ Object



14
15
16
17
18
19
20
21
# File 'lib/rbbt/util/misc/math.rb', line 14

def self.max(list)
  max = nil
  list.each do |v|
    next if v.nil?
    max = v if max.nil? or v > max
  end
  max
end

.mean(list) ⇒ Object



45
46
47
# File 'lib/rbbt/util/misc/math.rb', line 45

def self.mean(list)
  sum(list).to_f / list.compact.length
end

.median(array) ⇒ Object



49
50
51
52
53
# File 'lib/rbbt/util/misc/math.rb', line 49

def self.median(array)
  sorted = array.sort
  len = sorted.length
  (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
end

.memory_use(pid = nil) ⇒ Object



372
373
374
# File 'lib/rbbt/util/misc/development.rb', line 372

def self.memory_use(pid=nil)
  `ps -o rss -p #{pid || $$}`.strip.split.last.to_i
end

.memprofObject



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/rbbt/util/misc/development.rb', line 110

def self.memprof
  require 'memprof'
  Memprof.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    Memprof.stop
    print Memprof.stats
  end

  res
end

.merge_sorted_arrays(a1, a2) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/rbbt/util/misc/manipulation.rb', line 79

def self.merge_sorted_arrays(a1, a2)
  e1, e2 = a1.shift, a2.shift
  new = []
  while true
    case
    when (e1 and e2)
      case e1 <=> e2
      when 0
        new << e1 
        e1, e2 = a1.shift, a2.shift
      when -1
        new << e1
        e1 = a1.shift
      when 1
        new << e2
        e2 = a2.shift
      end
    when e2
      new << e2
      new.concat a2
      break
    when e1
      new << e1
      new.concat a1
      break
    else
      break
    end
  end
  new
end

.min(list) ⇒ Object



23
24
25
26
27
28
29
30
# File 'lib/rbbt/util/misc/math.rb', line 23

def self.min(list)
  min = nil
  list.each do |v|
    next if v.nil?
    min = v if min.nil? or v < min
  end
  min
end

.mtime_str(path) ⇒ Object



264
265
266
267
268
269
270
271
# File 'lib/rbbt/util/misc/inspect.rb', line 264

def self.mtime_str(path)
  path = path.find if Path === path
  if File.exists? path
    "mtime: " << File.mtime(path).to_s
  else
    "mtime: not present"
  end
end

.name2basename(file) ⇒ Object



24
25
26
# File 'lib/rbbt/util/misc/inspect.rb', line 24

def self.name2basename(file)
  sanitize_filename(file.gsub("/",'>').gsub("~", '-'))
end

.notify(description, event = 'notification', key = nil) ⇒ Object



389
390
391
392
393
394
395
396
397
398
399
400
401
# File 'lib/rbbt/util/misc/development.rb', line 389

def self.notify(description, event='notification', key = nil)
  if PUSHBULLET_KEY.nil? and key.nil?
    Log.warn "Could not notify, no PUSHBULLET_KEY"
    return
  end

  Thread.new do
    application = 'rbbt'
    event ||= 'notification'
    key ||= PUSHBULLET_KEY
    `curl -s --header "Authorization: Bearer #{key}" -X POST https://api.pushbullet.com/v2/pushes --header 'Content-Type: application/json' --data-binary '{"type": "note", "title": "#{event}", "body": "#{description}"}'`
  end
end

.obj2digest(obj) ⇒ Object



363
364
365
366
367
368
369
370
371
# File 'lib/rbbt/util/misc/inspect.rb', line 363

def self.obj2digest(obj)
  str = obj2str(obj)

  if str.empty?
    ""
  else
    digest(str)
  end
end

.obj2md5(obj) ⇒ Object



373
374
375
# File 'lib/rbbt/util/misc/inspect.rb', line 373

def self.obj2md5(obj)
  obj2digest(obj)
end

.obj2str(obj) ⇒ Object



274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# File 'lib/rbbt/util/misc/inspect.rb', line 274

def self.obj2str(obj)
  _obj = obj
  obj = Annotated.purge(obj) if Annotated === obj

  str = case obj
        when nil
          'nil'
        when TrueClass
          'true'
        when FalseClass
          'false'
        when Hash
          "{"<< obj.collect{|k,v| obj2str(k) + '=>' << obj2str(v)}*"," << "}"
        when Symbol 
          obj.to_s
        when (defined?(Path) and Path)
          if obj.exists?
            if obj.directory?
              files = obj.glob("**/*")
              "directory: #{files}"
            else
              "file: " << obj << "--" << mtime_str(obj)
            end
          else
            obj + " (file missing)"
          end
        when String
          if Misc.is_filename?(obj) and ! %w(. ..).include?(obj)
            obj2str Path.setup(obj.dup)
          else
            obj = obj.chomp if String === obj
            if obj.length > HASH2MD5_MAX_STRING_LENGTH
              sample_large_obj(obj, HASH2MD5_MAX_STRING_LENGTH) << "--" << txt_digest_str(obj)
            else
              obj
            end
          end
        when Array
          if obj.length > HASH2MD5_MAX_ARRAY_LENGTH
            "[" << sample_large_obj(obj, HASH2MD5_MAX_ARRAY_LENGTH).collect{|v| obj2str(v)} * "," << "]"
          else
            "[" << obj.collect{|v| obj2str(v)} * "," << "]"
          end
        when TSV::Parser
          remove_long_items(obj)
        when File 
          if obj.respond_to? :filename and obj.filename
            "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
          else
            "<IO:" << obj.path << "--" << mtime_str(obj.path) << ">"
          end
        when (defined? Step and Step)
          "<IO:" << obj.short_path << ">"
        when IO
          if obj.respond_to? :filename and obj.filename
            "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
          else

            if obj.respond_to? :obj2str
              obj.obj2str
            else
              class << obj
                attr_accessor :obj2str
              end
              obj.obj2str = obj.inspect + rand(1000000).to_s
            end
          end
        else
          if obj.respond_to? :filename and obj.filename
            "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
          else
            obj_ins = obj.inspect
            obj_str = if obj_ins =~ /:0x0/
              obj_ins.gsub(/:0x[a-f0-9]+/,'')
            else
              obj_ins
            end
          end
        end

  if defined? Annotated and Annotated === _obj and not (defined? AssociationItem and AssociationItem === _obj)
    info = Annotated.purge(_obj.info)
    str << "_" << obj2str(info) 
  end

  str
end

.object_delta(*args) ⇒ Object



261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/rbbt/util/misc/development.rb', line 261

def self.object_delta(*args)
  res, delta = nil, nil
  MUTEX_FOR_THREAD_EXCLUSIVE.synchronize do
    pre = Set.new
    delta = Set.new

    GC.start
    ObjectSpace.each_object(*args) do |o|
      pre.add o
    end

    res = yield

    GC.start
    ObjectSpace.each_object(*args) do |o|
      delta.add o unless pre.include? o
    end

  end
  Log.info "Delta: #{delta.inspect}"
  res
end

.open_pipe(do_fork = false, close = true) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/rbbt/util/misc/pipes.rb', line 64

def self.open_pipe(do_fork = false, close = true)
  raise "No block given" unless block_given?

  sout, sin = Misc.pipe

  if do_fork

    #parent_pid = Process.pid
    pid = Process.fork {
      purge_pipes(sin)
      sout.close
      begin

        yield sin
        sin.close if close and not sin.closed? 

      rescue Exception
        Log.exception $!
        #Process.kill :INT, parent_pid
        Kernel.exit! -1
      end
      Kernel.exit! 0
    }
    sin.close

    ConcurrentStream.setup sout, :pids => [pid]
  else


    ConcurrentStream.setup sin, :pair => sout
    ConcurrentStream.setup sout, :pair => sin

    thread = Thread.new do 
      begin
        
        yield sin

        sin.close if close and not sin.closed? and not sin.aborted?

      rescue Aborted
        Log.medium "Aborted open_pipe: #{$!.message}"
        raise $!
      rescue Exception
        Log.medium "Exception in open_pipe: #{$!.message}"
        Log.exception $!
        begin
          sin.raise($!) if sin.respond_to? :raise
          sin.join if sin.respond_to? :join
        ensure
          raise $!
        end
      end
    end

    sin.threads = [thread]
    sout.threads = [thread]
  end

  sout
end

.ordered_divide(array, num) ⇒ Object

Divides the array into chunks of num same size by placing one element in each chunk iteratively.



206
207
208
209
210
211
212
213
214
215
216
# File 'lib/rbbt/util/misc/development.rb', line 206

def self.ordered_divide(array, num)
  last = array.length - 1
  chunks = []
  current = 0
  while current <= last
    next_current = [last, current + num - 1].min
    chunks << array[current..next_current]
    current = next_current + 1
  end
  chunks
end

.parse_cmd_params(str) ⇒ Object



3
4
5
6
7
8
9
# File 'lib/rbbt/util/misc/options.rb', line 3

def self.parse_cmd_params(str)
  return str if Array === str
  str.scan(/
           (?:["']([^"']*?)["']) |
           ([^"'\s]+)
  /x).flatten.compact
end

.parse_sql_values(txt) ⇒ Object



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/rbbt/util/misc/format.rb', line 187

def self.parse_sql_values(txt)
  io = StringIO.new txt.strip

  values = []
  fields = []
  current = nil
  quoted = false
  while c = io.getc
    if quoted
      if c == "'"
        quoted = false
      else
        current << c
      end
    else
      case c
      when "("
        current = ""
      when ")"
        fields << current
        values << fields
        fields = []
        current = nil
      when ','
        if not current.nil?
          fields << current
          current = ""
        end
      when "'"
        quoted = true
      when ";"
        break
      else
        current << c
      end
    end
  end
  values
end

.paste_streams(streams, lines = nil, sep = "\t", header = nil, &block) ⇒ Object



606
607
608
609
610
611
612
# File 'lib/rbbt/util/misc/pipes.rb', line 606

def self.paste_streams(streams, lines = nil, sep = "\t", header = nil, &block)
  sep ||= "\t"
  num_streams = streams.length
  Misc.open_pipe do |sin|
    self._paste_streams(streams, sin, lines, sep, header, &block)
  end
end

.path_relative_to(basedir, path) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/rbbt/util/misc/system.rb', line 60

def self.path_relative_to(basedir, path)
  path = File.expand_path(path) unless path[0] == "/"
  basedir = File.expand_path(basedir) unless basedir[0] == "/"

  if path.index(basedir) == 0
    if basedir[-1] == "/"
      return path[basedir.length..-1]
    else
      return path[basedir.length+1..-1]
    end
  else
    return nil
  end
end

.pid_exists?(pid) ⇒ Boolean

Returns:

  • (Boolean)


9
10
11
12
13
14
15
16
17
# File 'lib/rbbt/util/misc/system.rb', line 9

def self.pid_exists?(pid)
  return false if pid.nil?
  begin
    Process.getpgid(pid.to_i)
    true
  rescue Errno::ESRCH
    false
  end
end

.pipeObject



24
25
26
27
28
29
30
31
32
33
34
# File 'lib/rbbt/util/misc/pipes.rb', line 24

def self.pipe
  OPEN_PIPE_IN.delete_if{|pipe| pipe.closed? }
  res = PIPE_MUTEX.synchronize do
    sout, sin = IO.pipe
    OPEN_PIPE_IN << sin

    [sout, sin]
  end
  Log.debug{"Creating pipe #{[res.last.inspect,res.first.inspect] * " => "}"}
  res
end

.positional2hash(keys, *values) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/rbbt/util/misc/options.rb', line 11

def self.positional2hash(keys, *values)
  if Hash === values.last
    extra = values.pop
    inputs = Misc.zip2hash(keys, values)
    inputs.delete_if{|k,v| v.nil? or (String === v and v.empty?)}
    inputs = Misc.add_defaults inputs, extra
    inputs.delete_if{|k,v| not keys.include?(k) and not (Symbol === k ? keys.include?(k.to_s) : keys.include?(k.to_sym))}
    inputs
  else
    Misc.zip2hash(keys, values)
  end
end

.pre_forkObject



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rbbt/util/misc/development.rb', line 8

def self.pre_fork
  Persist::CONNECTIONS.values.each do |db|
    db.close if db.write?
  end
  Log::ProgressBar::BARS.clear
  ObjectSpace.each_object(Mutex) do |m|
    begin
      m.unlock
    rescue ThreadError
    end if m.locked?
  end
end

.prepare_entity(entity, field, options = {}) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/rbbt/util/misc/objects.rb', line 16

def self.prepare_entity(entity, field, options = {})
  return entity unless defined? Entity
  return entity unless String === entity or Array === entity
  options ||= {}

  dup_array = options.delete :dup_array

  if Entity === field or (Entity.respond_to?(:formats) and (_format = Entity.formats.find(field)))
    params = options.dup

    params[:format] ||= params.delete "format"
    params.merge!(:format => _format) unless _format.nil? or (params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?)))

    mod = Entity === field ? field : Entity.formats[field]
    entity = mod.setup(
      ((entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) ),
      params
    ) 
  end

  entity
end

.process_options(hash, *keys) ⇒ Object



140
141
142
143
144
145
146
# File 'lib/rbbt/util/misc/options.rb', line 140

def self.process_options(hash, *keys)
  if keys.length == 1
    hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s) 
  else
    keys.collect do |key| hash.include?(key.to_sym) ? hash.delete(key.to_sym) : hash.delete(key.to_s) end
  end
end

.process_stream(s) ⇒ Object



442
443
444
445
446
447
448
449
450
# File 'lib/rbbt/util/misc/pipes.rb', line 442

def self.process_stream(s)
  begin
    yield s
    s.join if s.respond_to? :join
  rescue
    s.abort if s.respond_to? :abort
    raise $!
  end
end

.process_to_hash(list) ⇒ Object



41
42
43
44
# File 'lib/rbbt/util/misc/options.rb', line 41

def self.process_to_hash(list)
  result = yield list
  zip2hash(list, result)
end

.profile(options = {}) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/rbbt/util/misc/development.rb', line 93

def self.profile(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    printer = RubyProf::FlatPrinter.new(result)
    printer.print(STDOUT, options)
  end

  res
end

.profile_graph(options = {}) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/rbbt/util/misc/development.rb', line 75

def self.profile_graph(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    #result.eliminate_methods!([/annotated_array_clean_/])
    printer = RubyProf::GraphPrinter.new(result)
    printer.print(STDOUT, options)
  end

  res
end

.profile_html(options = {}) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/rbbt/util/misc/development.rb', line 54

def self.profile_html(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    printer = RubyProf::MultiPrinter.new(result)
    TmpFile.with_file do |dir|
      FileUtils.mkdir_p dir unless File.exist? dir
      printer.print(:path => dir, :profile => 'profile')
      CMD.cmd("firefox  -no-remote  '#{ dir }'")
    end
  end

  res
end

.proportions(array) ⇒ Object



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/rbbt/util/misc/math.rb', line 88

def self.proportions(array)
  total = array.length

  proportions = Hash.new 0

  array.each do |e|
    proportions[e] += 1.0 / total
  end

  class << proportions; self;end.class_eval do
    def to_s
      sort{|a,b| a[1] == b[1] ? a[0] <=> b[0] : a[1] <=> b[1]}.collect{|k,c| "%3d\t%s" % [c, k]} * "\n"
    end
  end

  proportions
end

.pull_keys(hash, prefix) ⇒ Object



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/rbbt/util/misc/options.rb', line 148

def self.pull_keys(hash, prefix)
  new = {}
  hash.keys.each do |key|
    if key.to_s =~ /#{ prefix }_(.*)/
      case
      when String === key
        new[$1] = hash.delete key
      when Symbol === key
        new[$1.to_sym] = hash.delete key
      end
    else
      if key.to_s == prefix.to_s
        new[key] = hash.delete key
      end
    end
  end

  new
end

.purge_pipes(*save) ⇒ Object



55
56
57
58
59
60
61
62
# File 'lib/rbbt/util/misc/pipes.rb', line 55

def self.purge_pipes(*save)
  PIPE_MUTEX.synchronize do
    OPEN_PIPE_IN.each do |pipe|
      next if save.include? pipe
      pipe.close unless pipe.closed?
    end
  end
end

.random_sample_in_range(total, size) ⇒ Object



218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/rbbt/util/misc/development.rb', line 218

def self.random_sample_in_range(total, size)
  p = Set.new

  if size > total / 10
    template = (0..total - 1).to_a
    size.times do |i|
      pos = (rand * (total - i)).floor
      if pos == template.length - 1
        v = template.pop
      else
        v, n = template[pos], template[-1]
        template.pop
        template[pos] = n
      end
      p << v
    end
  else
    size.times do
      pos = nil
      while pos.nil?
        pos = (rand * total).floor
        if p.include? pos
          pos = nil
        end
      end
      p << pos
    end
  end
  p
end

.read_full_stream(io) ⇒ Object



270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/rbbt/util/misc/pipes.rb', line 270

def self.read_full_stream(io)
  str = ""
  begin
    while block = io.read(BLOCK_SIZE)
      str << block
    end
    io.join if io.respond_to? :join
  rescue
    io.abort if io.respond_to? :abort
  end
  str
end

.read_stream(stream, size) ⇒ Object



346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
# File 'lib/rbbt/util/misc/pipes.rb', line 346

def self.read_stream(stream, size)
  str = nil
  Thread.pass while IO.select([stream],nil,nil,1).nil?
  while not str = stream.read(size)
    IO.select([stream],nil,nil,1) 
    Thread.pass
    raise ClosedStream if stream.eof?
  end

  while str.length < size
    raise ClosedStream if stream.eof?
    IO.select([stream],nil,nil,1)
    if new = stream.read(size-str.length)
      str << new
    end
  end
  str
end

.release_pipes(*pipes) ⇒ Object



47
48
49
50
51
52
53
# File 'lib/rbbt/util/misc/pipes.rb', line 47

def self.release_pipes(*pipes)
  PIPE_MUTEX.synchronize do
    pipes.flatten.each do |pipe|
      pipe.close unless pipe.closed?
    end
  end
end

.remove_lines(stream1, stream2, sort) ⇒ Object



679
680
681
# File 'lib/rbbt/util/misc/pipes.rb', line 679

def self.remove_lines(stream1, stream2, sort)
  self.compare_lines(stream1, stream2, '-2 -3', sort)
end

.remove_long_items(obj) ⇒ Object



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/rbbt/util/misc/inspect.rb', line 114

def self.remove_long_items(obj)
  case
  when IO === obj
    remove_long_items("IO: " + (obj.respond_to?(:filename) ? (obj.filename || obj.inspect) : obj.inspect ))
  when obj.respond_to?(:path)
    remove_long_items("File: " + obj.path)
  when TSV::Parser === obj
    filename = obj.filename
    filename = "STDIN(rand-#{rand(10000000)})" if filename == '-'
    remove_long_items("TSV Stream: " + filename + " -- " << Misc.fingerprint(obj.options))
  when TSV === obj
    tsv = obj
    fields = tsv.fields

    if obj.size > TSV_MAX_ROWS
      tsv = obj.head(TSV_MAX_ROWS)
      tsv["Truncated rows at #{TSV_MAX_ROWS} (#{obj.size})"] = nil
    end

    if fields && fields.length > TSV_MAX_FIELDS
      tsv = obj.slice(fields[0..TSV_MAX_ROWS-1])
      tsv.add_field "Truncated at #{TSV_MAX_ROWS} (#{fields.length})" do
        nil
      end
    elsif fields.nil?
      new = tsv.annotate({})
      tsv.each do |k,v|
        new[k] = Misc.remove_long_items(v)
      end
      tsv = new
    end

    tsv
  when (Array === obj and obj.length > ARRAY_MAX_LENGTH)
    remove_long_items(obj[0..ARRAY_MAX_LENGTH-2] << "TRUNCATED at #{ ARRAY_MAX_LENGTH }/#{obj.length}")
  when (Hash === obj and obj.length > ARRAY_MAX_LENGTH)
    remove_long_items(obj.collect.compact[0..ARRAY_MAX_LENGTH-2] << ["TRUNCATED", "at #{ ARRAY_MAX_LENGTH }/#{obj.length}"])
  when (String === obj and obj.length > STRING_MAX_LENGTH)
    obj[0..STRING_MAX_LENGTH-1] << " TRUNCATED at #{STRING_MAX_LENGTH}/#{obj.length}"
  when Hash === obj
    new = {}
    obj.each do |k,v|
      new[k] = remove_long_items(v)
    end
    new
  when Array === obj
    obj.collect do |e| remove_long_items(e) end
  else
    obj
  end
end

.remove_quoted_new_line(stream, quote = '"') ⇒ Object



727
728
729
# File 'lib/rbbt/util/misc/pipes.rb', line 727

def self.remove_quoted_new_line(stream, quote = '"')
  swap_quoted_character(stream, "\n", " ", quote)
end

.reset_do_onceObject



133
134
135
# File 'lib/rbbt/util/misc/development.rb', line 133

def self.reset_do_once
  $__did_once = false
end

.sample(ary, size, replacement = false) ⇒ Object



249
250
251
252
253
254
255
256
257
# File 'lib/rbbt/util/misc/development.rb', line 249

def self.sample(ary, size, replacement = false)
  if ary.respond_to? :sample
    ary.sample size
  else
    total = ary.length
    p = random_sample_in_range(total, size)
    ary.values_at *p
  end
end

.sample_large_obj(obj, max = 100) ⇒ Object



170
171
172
173
174
175
176
177
178
179
180
# File 'lib/rbbt/util/misc/inspect.rb', line 170

def self.sample_large_obj(obj, max = 100)
  length = obj.length
  head = obj[0..max/2]
  tail = obj[-max/2..-1]
  middle = (1..9).to_a.collect{|i| pos = (length / 10) * i + i; obj[pos-1..pos+1]}.flatten 
  if Array === obj 
    head + middle + tail + ["LENGTH: #{obj.length}"]
  else
    head << "..." << middle*"," << "..." << tail << "(#{obj.length})"
  end
end

.sanitize_filename(filename, length = 254) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/rbbt/util/misc/inspect.rb', line 28

def self.sanitize_filename(filename, length = 254)
  if filename.length > length
    if filename =~ /(\..{2,9})$/
      extension = $1
    else
      extension = ''
    end

    post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension

    filename = filename[0..(length - post_fix.length - 1)] << post_fix
  else
    filename
  end
  filename
end

.save_stream(file, stream) ⇒ Object



614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
# File 'lib/rbbt/util/misc/pipes.rb', line 614

def self.save_stream(file, stream)
  save, out = Misc.tee_stream stream
  out.filename = file
  save.filename = file

  Thread.new(Thread.current) do |parent|
    begin
      Misc.sensiblewrite(file, save)
    rescue Exception
      save.abort if save.respond_to? :abort
      stream.abort if stream.respond_to? :abort
      stream.join
      Log.medium "Exception in save_stream: #{$!.message}"
      raise $!
    end
  end

  out
end

.sd(list) ⇒ Object



72
73
74
75
76
# File 'lib/rbbt/util/misc/math.rb', line 72

def self.sd(list)
  return nil if list.length < 3
  variance = self.variance(list)
  Math.sqrt(variance)
end

.select_lines(stream1, stream2, sort) ⇒ Object



683
684
685
# File 'lib/rbbt/util/misc/pipes.rb', line 683

def self.select_lines(stream1, stream2, sort)
  self.compare_lines(stream1, stream2, '-1 -2', sort)
end

.select_ranges(stream1, stream2, sep = "\t") ⇒ Object



445
446
447
448
449
# File 'lib/rbbt/util/misc/omics.rb', line 445

def self.select_ranges(stream1, stream2, sep = "\t")
  Misc.open_pipe do |sin|
    intersect_streams(stream1, stream2,sin, sep)
  end
end

.send_email(from, to, subject, message, options = {}) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/rbbt/util/misc/system.rb', line 19

def self.send_email(from, to, subject, message, options = {})
  IndiferentHash.setup(options)
  options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login

  server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth

  msg = <<-END_OF_MESSAGE
From: #{from_alias} <#{from}>
To: #{to_alias} <#{to}>
Subject: #{subject}

#{message}
END_OF_MESSAGE

Net::SMTP.start(server, port, server, user, pass, auth) do |smtp|
smtp.send_message msg, from, to
end
end

.sensiblewrite(path, content = nil, options = {}, &block) ⇒ Object



365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
# File 'lib/rbbt/util/misc/pipes.rb', line 365

def self.sensiblewrite(path, content = nil, options = {}, &block)
  force = Misc.process_options options, :force

  if Open.exists? path and not force
    Misc.consume_stream content 
    return
  end

  lock_options = Misc.pull_keys options.dup, :lock
  lock_options = lock_options[:lock] if Hash === lock_options[:lock]
  tmp_path = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_dir})
  tmp_path_lock = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_lock_dir})

  tmp_path_lock = nil if FalseClass === options[:lock]

  Misc.lock tmp_path_lock, lock_options do

    if Open.exists? path and not force
      Log.warn "Path exists in sensiblewrite, not forcing update: #{ path }"
      Misc.consume_stream content 
    else
      FileUtils.mkdir_p File.dirname(tmp_path) unless File.directory? File.dirname(tmp_path)
      FileUtils.rm_f tmp_path if File.exist? tmp_path
      begin

        case
        when block_given?
          File.open(tmp_path, 'wb', &block)
        when String === content
          File.open(tmp_path, 'wb') do |f| f.write content end
        when (IO === content or StringIO === content or File === content)

          Open.write(tmp_path) do |f|
            f.sync = true
            while block = content.read(BLOCK_SIZE)
              f.write block
            end 
          end
        else
          File.open(tmp_path, 'wb') do |f|  end
        end

        begin
          Misc.insist do
            Open.mv tmp_path, path, lock_options
          end
        rescue Exception
          raise $! unless Open.exists? path
        end

        Open.touch path if Open.exists? path
        content.join if content.respond_to? :join and not (content.respond_to?(:joined?) and content.joined?)

        Open.notify_write(path) 
      rescue Aborted
        Log.medium "Aborted sensiblewrite -- #{ Log.reset << Log.color(:blue, path) }"
        content.abort if content.respond_to? :abort
        Open.rm path if File.exist? path
      rescue Exception
        exception = (AbortedStream === content and content.exception) ? content.exception : $!
        Log.medium "Exception in sensiblewrite: [#{Process.pid}] #{exception.message} -- #{ Log.color :blue, path }"
        content.abort if content.respond_to? :abort
        Open.rm path if File.exist? path
        raise exception
      rescue
        Log.exception $!
        raise $!
      ensure
        FileUtils.rm_f tmp_path if File.exist? tmp_path
        if Lockfile === lock_options[:lock] and lock_options[:lock].locked?
          lock_options[:lock].unlock
        end
      end
    end
  end
end

.snake_case(string) ⇒ Object



97
98
99
100
101
102
103
104
105
# File 'lib/rbbt/util/misc/format.rb', line 97

def self.snake_case(string)
  return nil if string.nil?
  string = string.to_s if Symbol === string
  string.
    gsub(/([A-Z]{2,})([A-Z][a-z])/,'\1_\2').
    gsub(/([a-z])([A-Z])/,'\1_\2').
    gsub(/\s/,'_').gsub(/[^\w_]/, '').
    split("_").collect{|p| p.match(/[A-Z]{2,}/) ? p : p.downcase } * "_"
end

.sort_genomic_locations(stream) ⇒ Object



319
320
321
# File 'lib/rbbt/util/misc/omics.rb', line 319

def self.sort_genomic_locations(stream)
  sort_stream(stream, '#', "-k1,1 -k2,2n -t#{sep}")
end

.sort_genomic_locations_strict(stream, sep = ":") ⇒ Object



315
316
317
# File 'lib/rbbt/util/misc/omics.rb', line 315

def self.sort_genomic_locations_strict(stream, sep = ":")
  sort_stream(stream, '#', "-k1,1V -k2,2n -t#{sep}")
end

.sort_mutation_stream(stream, sep = ":") ⇒ Object



701
702
703
# File 'lib/rbbt/util/misc/pipes.rb', line 701

def self.sort_mutation_stream(stream, sep=":")
  CMD.cmd("grep '#{sep}' | sort -u | sed 's/^M:/MT:/' | env LC_ALL=C sort -k1,1 -k2,2n -k3,3n -t'#{sep}'", :in => stream, :pipe => true, :no_fail => true)
end

.sort_mutation_stream_strict(stream, sep = ":") ⇒ Object



697
698
699
# File 'lib/rbbt/util/misc/pipes.rb', line 697

def self.sort_mutation_stream_strict(stream, sep=":")
  CMD.cmd("grep '#{sep}' | sort -u | sed 's/^M:/MT:/' | env LC_ALL=C sort -V -k1,1 -k2,2n -k3,3n -t'#{sep}'", :in => stream, :pipe => true, :no_fail => true)
end

.sort_mutations_strict(mutations) ⇒ Object Also known as: sort_mutations



275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/rbbt/util/misc/omics.rb', line 275

def self.sort_mutations_strict(mutations)
  mutations.collect do |mutation|
    chr, pos, mut = mutation.split ":"
    chr.sub!(/^chr/i,'')
    chr = 22 if chr == "Y"
    chr = 23 if chr == "X"
    chr = 24 if chr == "MT" or chr == "M"
    [chr.to_i, pos.to_i, mut, mutation]
  end.sort do |a,b|
    case a[0] <=> b[0]
    when -1
      -1
    when 1
      1
    when 0
      case a[1] <=> b[1]
      when -1
        -1
      when 1
        1
      when 0
        a[2] <=> b[2]
      end
    end
  end.collect{|p| p.last }
end

.sort_stream(stream, header_hash = "#", cmd_args = "-u") ⇒ Object



452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
# File 'lib/rbbt/util/misc/pipes.rb', line 452

def self.sort_stream(stream, header_hash = "#", cmd_args = "-u")
  Misc.open_pipe do |sin|
    stream = TSV.get_stream stream

    line = stream.gets
    while line =~ /^#{header_hash}/ do
      sin.puts line
      line = stream.gets
    end

    line_stream = Misc.open_pipe do |line_stream_in|
      line_stream_in.puts line
      begin
        Misc.consume_stream(stream, false, line_stream_in)
      rescue
        raise $!
      end
    end

    sorted = CMD.cmd("env LC_ALL=C sort #{cmd_args || ""}", :in => line_stream, :pipe => true)

    begin
      Misc.consume_stream(sorted, false, sin)
    rescue
      Log.exception $!
      begin
        sorted.raise($!) if sorted.respond_to? :raise
        stream.raise($!) if stream.respond_to? :raise
      ensure
        raise $!
      end
    end
  end
end

.sorted_array_hits(a1, a2) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/rbbt/util/misc/manipulation.rb', line 36

def self.sorted_array_hits(a1, a2)
  e1, e2 = a1.shift, a2.shift
  counter = 0
  match = []
  while true
    break if e1.nil? or e2.nil?
    case e1 <=> e2
    when 0
      match << counter
      e1, e2 = a1.shift, a2.shift
      counter += 1
    when -1
      while not e1.nil? and e1 < e2
        e1 = a1.shift 
        counter += 1
      end
    when 1
      e2 = a2.shift
      e2 = a2.shift while not e2.nil? and e2 < e1
    end
  end
  match
end

.std_num_vector(v, min, max) ⇒ Object



32
33
34
35
36
37
38
39
# File 'lib/rbbt/util/misc/math.rb', line 32

def self.std_num_vector(v, min, max)
  v_min = Misc.min(v)
  v_max = Misc.max(v)
  v_range = v_max - v_min
  range = max.to_f - min.to_f

  v.collect{|e| min + range * (e.to_f - v_min) / v_range } 
end

.string2const(string) ⇒ Object



21
22
23
24
25
26
27
28
29
30
# File 'lib/rbbt/util/misc/development.rb', line 21

def self.string2const(string)
  return nil if string.nil?
  mod = Kernel

  string.to_s.split('::').each do |str|
    mod = mod.const_get str
  end

  mod
end

.string2hash(string) ⇒ Object

options end



214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# File 'lib/rbbt/util/misc/options.rb', line 214

def self.string2hash(string)
  options = {}

  string.split('#').each do |str|
    key, sep, value = str.partition "="

    key = key[1..-1].to_sym if key[0] == ":"

    options[key] = true and next if value.empty?
    options[key] = value[1..-1].to_sym and next if value[0] == ":"
    options[key] = Regexp.new(/#{value[1..-2]}/) and next if value[0] == "/" and value[-1] == "/"
    options[key] = value[1..-2] and next if value =~ /^['"].*['"]$/
    options[key] = value.to_i and next if value =~ /^\d+$/
    options[key] = value.to_f and next if value =~ /^\d*\.\d+$/
    options[key] = true and next if value == "true"
    options[key] = false and next if value == "false"
    options[key] = value and next 

    options[key] = begin
                     saved_safe = $SAFE
                     $SAFE = 0
                     eval(value)
                   rescue Exception
                     value
                   ensure
                     $SAFE = saved_safe
                   end
  end

  return options

  options = {}
  string.split(/#/).each do |str|
    if str.match(/(.*)=(.*)/)
      option, value = $1, $2
    else
      option, value = str, true
    end

    option = option.sub(":",'').to_sym if option.chars.first == ':'
    value  = value.sub(":",'').to_sym if String === value and value.chars.first == ':'

    if value == true
      options[option] = option.to_s.chars.first != '!' 
    else
      options[option] = Thread.start do
        $SAFE = 0;
        case 
        when value =~ /^(?:true|T)$/i
          true
        when value =~ /^(?:false|F)$/i
          false
        when Symbol === value
          value
        when (String === value and value =~ /^\/(.*)\/$/)
          Regexp.new /#{$1}/
        else
          begin
            Kernel.const_get value
          rescue
            begin  
              raise if value =~ /[a-z]/ and defined? value
              eval(value) 
            rescue Exception
              value 
            end
          end
        end
      end.value
    end
  end

  options
end

.sum(list) ⇒ Object



41
42
43
# File 'lib/rbbt/util/misc/math.rb', line 41

def self.sum(list)
  list.compact.inject(0.0){|acc,e| acc += e}
end

.swap_quoted_character(stream, charout = "\n", charin = " ", quote = '"') ⇒ Object



705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
# File 'lib/rbbt/util/misc/pipes.rb', line 705

def self.swap_quoted_character(stream, charout="\n", charin=" ", quote='"')
  io = Misc.open_pipe do |sin|
    begin
      quoted = false
      prev = nil
      while c = stream.getc
        if c == quote and not prev == "\\"
          quoted = ! quoted
        end
        c = charin if c == charout and quoted
        sin << c
        prev = c
      end
    rescue
      stream.abort if stream.respond_to? :abort
      raise $!
    ensure
      stream.join if stream.respond_to? :join
    end
  end
end

.tar(path, tarfile = nil) ⇒ Object

Creates a tar file in memory recursively from the given path.

Returns a StringIO whose underlying String is the contents of the tar file.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/rbbt/util/tar.rb', line 14

def self.tar(path, tarfile = nil)
  tarfile ||= StringIO.new("")

  Gem::Package::TarWriter.new(tarfile) do |tar|
    Dir[File.join(path, "**/*")].each do |file|
      mode = File.stat(file).mode
      relative_file = file.sub /^#{Regexp::escape path}\/?/, ''

      if File.directory?(file)
        tar.mkdir relative_file, mode
      else
        tar.add_file relative_file, mode do |tf|
          File.open(file, "rb") { |f| tf.write f.read }
        end
      end
    end
  end

  tarfile.rewind

  tarfile
end

.tarize(path) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/rbbt/util/tar.rb', line 37

def self.tarize(path, gz = nil)
  gz ||= StringIO.new('wb')

  tar =  Misc.in_dir(path) do self.tar('.') end

  tar.rewind

  string = tar.string

  z = Zlib::GzipWriter.new(gz)
  z.write string
  z.close

  gz.reopen('read')
  gz.rewind


  gz
end

.tee_stream_thread(stream) ⇒ Object



246
247
248
# File 'lib/rbbt/util/misc/pipes.rb', line 246

def self.tee_stream_thread(stream)
  tee_stream_thread_multiple(stream, 2)
end

.tee_stream_thread_multiple(stream, num = 2) ⇒ Object Also known as: tee_stream

stream_out1, stream_out2

end



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/rbbt/util/misc/pipes.rb', line 175

def self.tee_stream_thread_multiple(stream, num = 2)
  in_pipes = []
  out_pipes = []
  num.times do 
    sout, sin = Misc.pipe
    in_pipes << sin
    out_pipes << sout
  end

  filename = stream.filename if stream.respond_to? :filename

  splitter_thread = Thread.new(Thread.current) do |parent|
    begin

      skip = [false] * num
      begin
        while block = stream.readpartial(BLOCK_SIZE)

          in_pipes.each_with_index do |sin,i|
            begin 
              sin.write block
            rescue IOError
              Log.error("Tee stream #{i} #{Misc.fingerprint stream} IOError: #{$!.message} (#{Misc.fingerprint sin})");
              skip[i] = true
            rescue
              Log.error("Tee stream #{i} #{Misc.fingerprint stream} Exception: #{$!.message} (#{Misc.fingerprint sin})");
              raise $!
            end unless skip[i] 
          end
        end
      rescue IOError
      end

      stream.close unless stream.closed?
      #stream.join if stream.respond_to? :join
      in_pipes.first.close unless in_pipes.first.closed?
      #Log.medium "Tee done #{Misc.fingerprint stream}"
    rescue Aborted, Interrupt
      stream.abort if stream.respond_to? :abort
      out_pipes.each do |sout|
        sout.abort if sout.respond_to? :abort
      end
      Log.medium "Tee aborting #{Misc.fingerprint stream}"
      raise $!
    rescue Exception
      stream.abort($!) if stream.respond_to? :abort
      out_pipes.each do |sout|
        sout.abort if sout.respond_to? :abort
      end
      Log.medium "Tee exception #{Misc.fingerprint stream}"
      raise $!
    end
  end

  out_pipes.each do |sout|
    ConcurrentStream.setup sout, :threads => splitter_thread, :filename => filename, :_pair => stream
  end

  main_pipe = out_pipes.first
  main_pipe.autojoin = true

  main_pipe.callback = Proc.new do 
    stream.join if stream.respond_to? :join
    in_pipes[1..-1].each do |sin|
      sin.close unless sin.closed?
    end
  end

  out_pipes
end

.time_tickObject



284
285
286
287
288
289
290
291
292
293
# File 'lib/rbbt/util/misc/development.rb', line 284

def self.time_tick
  if $_last_time_tick.nil?
    $_last_time_tick = Time.now
    puts "Tick started: #{Time.now}"
  else
    ellapsed = Time.now - $_last_time_tick
    puts "Tick ellapsed: #{ellapsed.to_i} s. #{(ellapsed * 1000).to_i - ellapsed.to_i * 1000} ms"
    $_last_time_tick = Time.now
  end
end

.timespan(str, default = "s") ⇒ Object



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/rbbt/util/misc.rb', line 71

def self.timespan(str, default = "s")
  tokens = {
    "s" => (1),
    "sec" => (1),
    "m" => (60),
    "min" => (60),
    "''" => (1),
    "'" => (60),
    "h" => (60 * 60),
    "d" => (60 * 60 * 24),
    "w" => (60 * 60 * 24 * 7),
    "mo" => (60 * 60 * 24 * 30),
    "y" => (60 * 60 * 24 * 365),
  }

  tokens[nil] = tokens[default]
  tokens[""] = tokens[default]
  time = 0
  str.scan(/(\d+)(\w*)/).each do |amount, measure|
    time += amount.to_i * tokens[measure]
  end
  time
end

.to_utf8(string) ⇒ Object



159
160
161
# File 'lib/rbbt/util/misc/format.rb', line 159

def self.to_utf8(string)
  string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
end

.tokenize(str) ⇒ Object



67
68
69
# File 'lib/rbbt/util/misc.rb', line 67

def self.tokenize(str)
  str.scan(/"[^"]*"|'[^']*'|[^"'\s]+/)
end

.total_length(ranges) ⇒ Object



32
33
34
# File 'lib/rbbt/util/misc/manipulation.rb', line 32

def self.total_length(ranges)
  self.collapse_ranges(ranges).inject(0) do |total,range| total += range.end - range.begin + 1 end
end

.translate_dna_mutation_hgvs2rbbt(cds) ⇒ Object



205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/rbbt/util/misc/omics.rb', line 205

def self.translate_dna_mutation_hgvs2rbbt(cds)
  change = case
           when cds =~ />/
             cds.split(">").last
           when cds =~ /del/
             deletion = cds.split("del").last.chomp
             case
             when deletion =~ /^\d+$/
               "-" * deletion.to_i
             when deletion =~ /^[ACTG]+$/i
               "-" * deletion.length
             else
               Log.debug "Unknown deletion: #{ deletion }"
               deletion
             end
           when cds =~ /ins/
             insertion = cds.split("ins").last
             case
             when insertion =~ /^\d+$/
               "+" + "N" * insertion.to_i
             when insertion =~ /^[NACTG]+$/i
               "+" + insertion
             else
               Log.debug "Unknown insertion: #{insertion }"
               insertion
             end
           else
             Log.debug "Unknown change: #{cds}"
             "?(" << cds << ")"
           end
  change
end

.translate_prot_mutation_hgvs2rbbt(mutation) ⇒ Object



238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'lib/rbbt/util/misc/omics.rb', line 238

def self.translate_prot_mutation_hgvs2rbbt(mutation)
  one_aa_code = THREE_TO_ONE_AA_CODE.values
  one_aa_code << "X" << "B" << "Z" << "J" << "*" << "?"
  one_aa_code_re = one_aa_code*""
  subs = Regexp.new("^[#{one_aa_code_re}]\\d+[#{one_aa_code_re}]")
  f_aa = Regexp.new("^[#{one_aa_code_re}]\\d+")
  mutation.sub!('p.', '')
  mutation = case
             when mutation =~ subs
               mutation
             when mutation =~ /fs/
               mutation =~ f_aa
               if Regexp.last_match(0).nil?
                 Log.debug "Unknown Frameshift: #{mutation}"
                 nil
               else
                 Regexp.last_match(0) + "Frameshift"
               end
             when mutation =~ /ins|del|>/
               mutation =~ f_aa
               if Regexp.last_match(0).nil?
                 Log.debug "Unknown Indel"
                 nil
               else
                 Regexp.last_match(0) + "Indel"
               end
             else
               Log.debug "Unknown change: #{mutation}"
               nil
             end
end

.try3times(&block) ⇒ Object



187
188
189
# File 'lib/rbbt/util/misc/development.rb', line 187

def self.try3times(&block)
  insist(3, &block)
end

.txt_digest_str(txt) ⇒ Object



260
261
262
# File 'lib/rbbt/util/misc/inspect.rb', line 260

def self.txt_digest_str(txt)
  "digest: " << digest(txt)
end

.ungzip(tarfile) ⇒ Object

un-gzips the given IO, returning the decompressed version as a StringIO



79
80
81
82
83
84
# File 'lib/rbbt/util/tar.rb', line 79

def self.ungzip(tarfile)
  z = Zlib::GzipReader.new(tarfile)
  unzipped = StringIO.new(z.read)
  z.close
  unzipped
end

.untar(io, destination) ⇒ Object

untars the given IO into the specified directory



94
95
96
97
98
99
100
101
102
103
# File 'lib/rbbt/util/tar.rb', line 94

def self.untar(io, destination)
  io = io.find if Path === io
  if String === io and File.exists?(io)
    Open.open(io) do |f|
      untar(f, destination)
    end
  else
    return _untar_cmd(io, destination)
  end
end

.unzip_in_dir(file, dir) ⇒ Object



403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
# File 'lib/rbbt/util/misc/development.rb', line 403

def self.unzip_in_dir(file, dir)
  raise "Target is not a directory: #{file}" if File.exist?(dir) and not File.directory?(dir)
  if Open.remote? file
    file = file.find if Path === file
    Open.open(file) do |stream|
      TmpFile.with_file(stream.read, true, :extension => 'zip') do |zip_file|
        CMD.cmd("unzip '#{zip_file}' -d '#{dir}'")
      end
    end
  else
    file = file.find if Path === file
    zip_file = file
    CMD.cmd("unzip '#{zip_file}' -d '#{dir}'")
  end
end

.use_lock_id=(use = true) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# File 'lib/rbbt/util/misc/lock.rb', line 2

def self.use_lock_id=(use = true)
  if use
    Log.medium "Activating lockfile ids"
    Lockfile.dont_use_lock_id = false
    Lockfile.refresh = 2 
    Lockfile.max_age = 30
    Lockfile.suspend = 4
  else
    Log.medium "De-activating lockfile ids"
    Lockfile.dont_use_lock_id = true
    Lockfile.refresh = 4
    Lockfile.max_age = 60
    Lockfile.suspend = 8
  end
end

.variance(list) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/rbbt/util/misc/math.rb', line 55

def self.variance(list)
  return nil if list.length < 3
  mean = mean(list)
  list = list.compact
  list_length = list.length

  total_square_distance = 0.0
  list.each do |value|
    distance = value.to_f - mean
    total_square_distance += distance * distance
  end

  variance = total_square_distance / (list_length - 1)

end

.with_env(var, value, &block) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
# File 'lib/rbbt/util/misc/system.rb', line 48

def self.with_env(var, value, &block)
  var = var.to_s
  value = value.to_s
  current = ENV[var]
  begin
    ENV[var] = value
    yield
  ensure
    ENV[var] = current
  end
end

.with_fifo(path = nil, &block) ⇒ Object



36
37
38
39
40
41
42
43
44
45
# File 'lib/rbbt/util/misc/pipes.rb', line 36

def self.with_fifo(path = nil, &block)
  begin
    erase = path.nil?
    path = TmpFile.tmp_file if path.nil?
    File.mkfifo path
    yield path
  ensure
    FileUtils.rm path if erase
  end
end

.zip2hash(list1, list2) ⇒ Object



33
34
35
36
37
38
39
# File 'lib/rbbt/util/misc/options.rb', line 33

def self.zip2hash(list1, list2)
  hash = {}
  list1.each_with_index do |e,i|
    hash[e] = list2[i]
  end
  hash
end

.zip_fields(array) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/rbbt/util/misc/objects.rb', line 77

def self.zip_fields(array)
  if array.length < 10000
    _zip_fields(array)
  else
    zipped_slices = []
    max = array.collect{|l| l.length}.max
    array.each_slice(10000) do |slice|
      zipped_slices << _zip_fields(slice, max)
    end
    new = zipped_slices.first
    zipped_slices[1..-1].each do |rest|
      rest.each_with_index do |list,i|
        new[i].concat list
      end
    end
    new
  end
end

.zscore(e, list) ⇒ Object



133
134
135
136
137
# File 'lib/rbbt/util/misc/math.rb', line 133

def self.zscore(e, list)
  m = Misc.mean(list)
  sd = Misc.sd(list)
  (e.to_f - m) / sd
end