Module: Bioinform::CLI::MergeIntoCollection

Extended by:
Helpers
Defined in:
lib/bioinform/cli/merge_into_collection.rb

Class Method Summary collapse

Methods included from Helpers

basename_changed_extension, basename_wo_extension, change_folder_and_extension, name_wo_extension, set_extension, set_folder

Class Method Details

.main(argv) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/bioinform/cli/merge_into_collection.rb', line 10

def self.main(argv)
  doc = <<-DOCOPT
    Tool for merging multiple motifs into a single collection file.
    It takes motif files or (entire collections) and creates a collection consisting of them all. By default motifs are treated simply as matrices(PM), but you can (possibly should) specify data model. Output file by default are in YAML-format but it's possible to create plain text file. YAML collections are useful if you want to provide additional information for motifs in collection with another tool, plain text is more readable by humans.

    Usage:
      merge_into_collection [options] [<pm-files>...]

    Options:
      -h --help                 Show this screen.
      -n --name NAME            Specify name for a collection. Default filename is based on this parameter
      -o --output-file FILE     Output file for resulting collection
      -m --data-model MODEL     Data model: PM, PCM, PPM or PWM [default: PM]
      -p --plain-text           Output collection of motifs in plain text (motifs separated with newlines, no additional information included).
  DOCOPT

  doc.gsub!(/^#{doc[/\A +/]}/,'')
  options = Docopt::docopt(doc, argv: argv)

  plain_text = options['--plain-text']
  name = options['--name']
  if options['--plain-text']
    output_file = options['--output-file'] || set_extension(name || 'collection', 'txt')
  else
    output_file = options['--output-file'] || set_extension(name || 'collection', 'yaml')
  end
  data_model = Bioinform.const_get(options['--data-model'].upcase)

  if options['<pm-files>'].empty?
    filelist = $stdin.read.shellsplit
  else
    filelist = options['<pm-files>']
  end
  
  filelist = filelist.map do |data_source|
    if File.directory? data_source
      Dir.glob(File.join(data_source, '*'))
    elsif File.file? data_source
      data_source
    else
      raise "File or directory #{data_source} can't be found"
    end
  end.flatten

  collection = Collection.new
  collection.name = name  if name

  filelist.each do |filename|
    data_model.split_on_motifs(File.read(filename)).each do |pm|
      pm.name ||= File.basename(filename, File.extname(filename))
      collection << pm
    end
  end

  if plain_text
    File.open(output_file, 'w') do |f|
      collection.each(options['--data-model'].downcase) do |pm|
        f.puts(pm.to_s + "\n\n")
      end
    end
  else
    File.open(output_file, 'w'){|f| YAML.dump(collection, f) }
  end
  
rescue Docopt::Exit => e
  puts e.message
end