7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
# File 'lib/macroape/cli/find_pvalue.rb', line 7
def self.main(argv)
doc = <<-EOS.strip_doc
Command-line format:
#{run_tool_cmd} <pat-file> <threshold list>... [options]
Options:
[-d <discretization level>]
[--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.
[-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25
Examples:
#{run_tool_cmd} motifs/KLF4_f2.pat 7.32
#{run_tool_cmd} motifs/KLF4_f2.pat 7.32 4.31 5.42 -d 1000 -b 0.2,0.3,0.3,0.2
EOS
if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}
$stderr.puts doc
exit
end
discretization = 10000
background = Bioinform::Background::Wordwise
thresholds = []
max_hash_size = 10000000
data_model = argv.delete('--pcm') ? :pcm : :pwm
filename = argv.shift
loop do
begin
Float(argv.first)
thresholds << argv.shift.to_f
rescue
raise StopIteration
end
end
raise 'No input. You should specify input file' unless filename
raise 'You should specify at least one threshold' if thresholds.empty?
until argv.empty?
case argv.shift
when '-b'
background = Bioinform::Background.from_string(argv.shift)
when '-d'
discretization = argv.shift.to_f
when '--max-hash-size'
max_hash_size = argv.shift.to_i
end
end
raise "Error! File #{filename} doesn't exist" unless File.exist?(filename)
input = File.read(filename)
parser = Bioinform::MatrixParser.new
motif_data = parser.parse!(input)
case data_model
when :pcm
pcm = Bioinform::MotifModel::PCM.new(motif_data[:matrix]).named(motif_data[:name])
pwm = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: background).convert(pcm)
when :pwm
pwm = Bioinform::MotifModel::PWM.new(motif_data[:matrix]).named(motif_data[:name])
end
pwm = pwm.discreted(discretization)
counting = PWMCounting.new(pwm, background: background, max_hash_size: max_hash_size)
counts = counting.counts_by_thresholds(* thresholds.map{|count| count * discretization})
infos = []
thresholds.each do |threshold|
count = counts[threshold * discretization]
pvalue = count.to_f / (counting.vocabulary_volume)
infos << {threshold: threshold,
number_of_recognized_words: count,
pvalue: pvalue}
end
puts Helper.find_pvalue_info_string(infos,
{discretization: discretization,
background: background} )
rescue => err
$stderr.puts "\n#{err}\n#{err.backtrace.first(5).join("\n")}\n\nUse --help option for help\n\n#{doc}"
end
|