Module: Macroape::CLI::EvalAlignment

Defined in:
lib/macroape/cli/eval_alignment.rb

Class Method Summary collapse

Class Method Details

.main(argv) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/macroape/cli/eval_alignment.rb', line 7

def self.main(argv)
  doc = "    Command-line format:\n    \#{run_tool_cmd} <1st matrix pat-file> <2nd matrix pat-file> <shift> <orientation(direct/revcomp)> [options]\n\n    Options:\n      [-p <P-value>]\n      [-d <discretization level>]\n      [--pcm] - treat the input file as Position Count Matrix. PCM-to-PWM transformation to be done internally.\n      [--boundary lower|upper] Upper boundary (default) means that the obtained P-value is greater than or equal to the requested P-value\n      [-b <background probabilities] ACGT - 4 numbers, comma-delimited(spaces not allowed), sum should be equal to 1, like 0.25,0.24,0.26,0.25\n      [--first-threshold <threshold for the first matrix>]\n      [--second-threshold <threshold for the second matrix>]\n\n    Examples:\n      \#{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat -1 direct -p 0.0005 -d 100 -b 0.3,0.2,0.2,0.3\n      \#{run_tool_cmd} motifs/KLF4_f2.pat motifs/SP1_f1.pat 3 revcomp\n  EOS\n\n  if argv.empty? || ['-h', '--h', '-help', '--help'].any?{|help_option| argv.include?(help_option)}\n    $stderr.puts doc\n    exit\n  end\n\n  pvalue = 0.0005\n  discretization = 10.0\n\n  first_background = Bioinform::Background::Wordwise\n  second_background = Bioinform::Background::Wordwise\n  max_hash_size = 10000000\n  max_pair_hash_size = 10000\n  pvalue_boundary = :upper\n\n  data_model = argv.delete('--pcm') ? :pcm : :pwm\n\n  first_file = argv.shift\n  second_file = argv.shift\n\n  shift = argv.shift\n  orientation = argv.shift\n\n  raise 'You should specify two input files'  unless first_file and second_file\n  raise 'You should specify shift' unless shift\n  raise 'You should specify orientation' unless orientation\n\n  shift = shift.to_i\n  orientation = orientation.to_sym\n\n  case orientation\n    when :direct\n      reverse = false\n    when :revcomp\n      reverse = true\n    else\n      raise 'Unknown orientation(direct/revcomp)'\n  end\n\n\n  until argv.empty?\n    case argv.shift\n      when '-p'\n        pvalue = argv.shift.to_f\n      when '-d'\n        discretization = argv.shift.to_f\n      when '--max-hash-size'\n        max_hash_size = argv.shift.to_i\n      when '--max-2d-hash-size'\n        max_pair_hash_size = argv.shift.to_i\n      when '-b'\n        second_background = first_background = Bioinform::Background.from_string(argv.shift)\n      when '-b1'\n        first_background = Bioinform::Background.from_string(argv.shift)\n      when '-b2'\n        second_background = Bioinform::Background.from_string(argv.shift)\n      when '--boundary'\n        pvalue_boundary = argv.shift.to_sym\n        raise 'boundary should be either lower or upper'  unless  pvalue_boundary == :lower || pvalue_boundary == :upper\n      when '--first-threshold'\n        predefined_threshold_first = argv.shift.to_f\n      when '--second-threshold'\n        predefined_threshold_second = argv.shift.to_f\n    end\n  end\n  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless first_background.symmetric?\n  raise 'background should be symmetric: p(A)=p(T) and p(G) = p(C)' unless second_background.symmetric?\n\n  raise \"Error! File \#{first_file} don't exist\" unless File.exist?(first_file)\n  input_first = File.read(first_file)\n  input_first = Bioinform::MatrixParser.new.parse!(input_first)\n\n  raise \"Error! File \#{second_file} don't exist\" unless File.exist?(second_file)\n  input_second = File.read(second_file)\n  input_second = Bioinform::MatrixParser.new.parse!(input_second)\n\n  case data_model\n  when :pcm\n    pcm_first = Bioinform::MotifModel::PCM.new(input_first[:matrix]).named(input_first[:name])\n    pwm_first = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: first_background).convert(pcm_first)\n    pcm_second = Bioinform::MotifModel::PCM.new(input_second[:matrix]).named(input_second[:name])\n    pwm_second = Bioinform::ConversionAlgorithms::PCM2PWMConverter.new(pseudocount: :log, background: second_background).convert(pcm_second)\n  when :pwm\n    pwm_first = Bioinform::MotifModel::PWM.new(input_first[:matrix]).named(input_first[:name])\n    pwm_second = Bioinform::MotifModel::PWM.new(input_second[:matrix]).named(input_second[:name])\n  end\n\n  pwm_first = pwm_first.discreted(discretization)\n  pwm_second = pwm_second.discreted(discretization)\n\n  counting_first = PWMCounting.new(pwm_first, background: first_background, max_hash_size: max_hash_size)\n  counting_second = PWMCounting.new(pwm_second, background: second_background, max_hash_size: max_hash_size)\n\n  cmp = Macroape::PWMCompareAligned.new(counting_first, counting_second, shift, orientation).tap{|x| x.max_pair_hash_size = max_pair_hash_size }\n\n  if predefined_threshold_first\n    threshold_first = predefined_threshold_first * discretization\n  else\n    if pvalue_boundary == :lower\n      threshold_first = counting_first.threshold(pvalue)\n    else\n      threshold_first = counting_first.weak_threshold(pvalue)\n    end\n  end\n\n  if predefined_threshold_second\n    threshold_second = predefined_threshold_second * discretization\n  else\n    if pvalue_boundary == :lower\n      threshold_second = counting_second.threshold(pvalue)\n    else\n      threshold_second = counting_second.weak_threshold(pvalue)\n    end\n  end\n  info = cmp.alignment_infos.merge( cmp.jaccard(threshold_first, threshold_second) )\n  info.merge!(predefined_threshold_first: predefined_threshold_first,\n              predefined_threshold_second: predefined_threshold_second,\n              threshold_first: threshold_first / discretization,\n              threshold_second: threshold_second / discretization,\n              discretization: discretization,\n              first_background: first_background,\n              second_background: second_background,\n              requested_pvalue: pvalue,\n              pvalue_boundary: pvalue_boundary)\n  puts Helper.similarity_info_string(info)\n\nrescue => err\n  $stderr.puts \"\\n\#{err}\\n\#{err.backtrace.first(5).join(\"\\n\")}\\n\\nUse --help option for help\\n\\n\#{doc}\"\nend\n".strip_doc