7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
# File 'lib/macroape/cli/find_threshold.rb', line 7
def self.main(argv)
doc = " Command-line format:\n \#{run_tool_cmd} <pat-file> [<list of P-values>...] [options]\n\n Options:\n [-d <discretization level>]\n [--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.\n [--boundary lower|upper] Lower boundary (default) means that the obtained P-value is less than or equal to the requested P-value\n [-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25\n\n Example:\n \#{run_tool_cmd} motifs/KLF4_f2.pat\n \#{run_tool_cmd} motifs/KLF4_f2.pat 0.001 0.0001 0.0005 -d 1000 -b 0.4,0.3,0.2,0.1\n EOS\n\n if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}\n $stderr.puts doc\n exit\n end\n\n background = Bioinform::Background::Wordwise\n default_pvalues = [0.0005]\n discretization = 10000\n max_hash_size = 10000000\n data_model = argv.delete('--pcm') ? :pcm : :pwm\n\n pvalue_boundary = :lower\n\n\n filename = argv.shift\n raise 'No input. You should specify input file' unless filename\n\n pvalues = []\n loop do\n begin\n Float(argv.first)\n pvalues << argv.shift.to_f\n rescue\n raise StopIteration\n end\n end\n pvalues = default_pvalues if pvalues.empty?\n\n until argv.empty?\n case argv.shift\n when '-b'\n background = Bioinform::Background.from_string(argv.shift)\n when '--max-hash-size'\n max_hash_size = argv.shift.to_i\n when '-d'\n discretization = argv.shift.to_f\n when '--boundary'\n pvalue_boundary = argv.shift.to_sym\n raise 'boundary should be either lower or upper' unless pvalue_boundary == :lower || pvalue_boundary == :upper\n end\n end\n\n raise \"Error! File \#{filename} doesn't exist\" unless File.exist?(filename)\n input = File.read(filename)\n\n parser = Bioinform::MatrixParser.new\n motif_data = parser.parse!(input)\n case data_model\n when :pcm\n pcm = Bioinform::MotifModel::PCM.new(motif_data[:matrix]).named(motif_data[:name])\n pwm = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: background).convert(pcm)\n when :pwm\n pwm = Bioinform::MotifModel::PWM.new(motif_data[:matrix]).named(motif_data[:name])\n end\n\n pwm = pwm.discreted(discretization)\n counting = PWMCounting.new(pwm, background: background, max_hash_size: max_hash_size)\n\n infos = []\n collect_infos_proc = ->(pvalue, threshold, real_pvalue) do\n infos << {expected_pvalue: pvalue,\n threshold: threshold / discretization,\n real_pvalue: real_pvalue,\n recognized_words: real_pvalue * counting.vocabulary_volume }\n end\n if pvalue_boundary == :lower\n counting.thresholds(*pvalues, &collect_infos_proc)\n else\n counting.weak_thresholds(*pvalues, &collect_infos_proc)\n end\n puts Helper.threshold_infos_string(infos,\n {discretization: discretization,\n background: background,\n pvalue_boundary: pvalue_boundary} )\nrescue => err\n $stderr.puts \"\\n\#{err}\\n\#{err.backtrace.first(5).join(\"\\n\")}\\n\\nUse --help option for help\\n\\n\#{doc}\"\nend\n".strip_doc
|