Class: Bioroebe::Compseq
Overview
Constant Summary
collapse
- LEFT_PADDING =
8
- COLOURIZE_CpG_ISLANDS =
#
COLOURIZE_CpG_ISLANDS
If the following constant is set to true, then we will denote CpG islands as red.
#
true
- EXPECTED_FREQUENCY =
'0.0625000'
- DEFAULT_WORD_SIZE =
#
DEFAULT_WORD_SIZE
This is also known as the “word length”.
#
2
Bioroebe::CommandlineApplication::OLD_VERBOSE_VALUE
Bioroebe::ColoursForBase::ARRAY_HTML_COLOURS_IN_USE
Constants inherited
from Base
Base::NAMESPACE
Instance Method Summary
collapse
#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opne, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #set_be_verbose, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into
#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments
#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?
Methods inherited from Base
#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #file_readlines, #infer_the_namespace, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #namespace?, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into
Constructor Details
#initialize(i = nil, run_already = true) ⇒ Compseq
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 65
def initialize(
i = nil,
run_already = true
)
reset
set_commandline_arguments(i)
set_input(i)
if block_given?
yielded = yield
case yielded
when :disable_colours
disable_colours
when :be_quiet
set_be_quiet
when :disable_colours_and_be_quiet
disable_colours
set_be_quiet
end
end
run if run_already
end
|
Instance Method Details
#consider_colourizing_CpG_islands(i) ⇒ Object
#
consider_colourizing_CpG_islands
The input will only be colourized if we enabled this via a constant.
#
470
471
472
473
474
475
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 470
def consider_colourizing_CpG_islands(i)
if COLOURIZE_CpG_ISLANDS and use_colours?
i = swarn(i)+rev
end
return i
end
|
#count(i) ⇒ Object
#
count
This method will delegate to the respective AA methods.
#
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 338
def count(i)
case i
when :AA then count_AA
when :AC then count_AC
when :AG then count_AG
when :AT then count_AT
when :CA then count_CA
when :CC then count_CC
when :CG then count_CG
when :CT then count_CT
when :GA then count_GA
when :GC then count_GC
when :GG then count_GG
when :GT then count_GT
when :TA then count_TA
when :TC then count_TC
when :TG then count_TG
when :TT then count_TT
end
end
|
#count_AA ⇒ Object
315
316
317
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 315
def count_AA
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_AC ⇒ Object
308
309
310
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 308
def count_AC
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_AG ⇒ Object
301
302
303
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 301
def count_AG
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_AT ⇒ Object
294
295
296
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 294
def count_AT
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_CA ⇒ Object
287
288
289
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 287
def count_CA
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_CC ⇒ Object
280
281
282
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 280
def count_CC
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_CG ⇒ Object
273
274
275
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 273
def count_CG
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_CT ⇒ Object
266
267
268
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 266
def count_CT
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_frequencies ⇒ Object
#
count_frequencies
This will populate our main hash.
#
189
190
191
192
193
194
195
196
197
198
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 189
def count_frequencies
characters = input?.chars
characters.each_with_index {|char, index|
next_character = input?[index+1, 1]
if next_character
joined = char+next_character
@hash[joined] += 1 if joined.size > 1
end
}
end
|
#count_GA ⇒ Object
259
260
261
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 259
def count_GA
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_GC ⇒ Object
252
253
254
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 252
def count_GC
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_GG ⇒ Object
245
246
247
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 245
def count_GG
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_GT ⇒ Object
238
239
240
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 238
def count_GT
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_TA ⇒ Object
231
232
233
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 231
def count_TA
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_TC ⇒ Object
224
225
226
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 224
def count_TC
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_TG ⇒ Object
217
218
219
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 217
def count_TG
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_TT ⇒ Object
210
211
212
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 210
def count_TT
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#determine_result ⇒ Object
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 395
def determine_result
count_frequencies
@result = rev+"#
# Output from '#{steelblue('Bioroebe::Compseq')}#{rev}'
#
# The Expected frequencies are calculated on the (false) assumption that every
# word has equal frequency. #{swarn('Red')}#{rev} denotes possible CpG islands.
#
# The input sequences are:
# "+identifier?+"
Word size "+word_size?.to_s+'
Total count '+total_count?.to_s.rjust(3)+"
#
# Word Obs Count Obs Frequency Exp Frequency Obs/Exp Frequency
#
"+
output_full_line_for(:AA)+
output_full_line_for(:AC)+
output_full_line_for(:AG)+
output_full_line_for(:AT)+
output_full_line_for(:CA)+
output_full_line_for(:CC)+
output_full_line_for(:CG)+
output_full_line_for(:CT)+
output_full_line_for(:GA)+
output_full_line_for(:GC)+
output_full_line_for(:GG)+
output_full_line_for(:GT)+
output_full_line_for(:TA)+
output_full_line_for(:TC)+
output_full_line_for(:TG)+
output_full_line_for(:TT)+'
Other 0 0.0000000 0.0000000 10000000000.0000000'
end
|
#expected? ⇒ Boolean
#
expected?
Must return a Float.
#
437
438
439
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 437
def expected?
EXPECTED_FREQUENCY.to_f
end
|
#expected_frequency(for_this_sequence = :AA) ⇒ Object
444
445
446
447
448
449
450
451
452
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 444
def expected_frequency(
for_this_sequence = :AA
)
(
(
observed_frequency(for_this_sequence).to_f / expected?
).to_s.ljust(9,'0')
).rjust(12)
end
|
#hash? ⇒ Boolean
203
204
205
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 203
def hash?
@hash
end
|
#identifier? ⇒ Boolean
180
181
182
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 180
def identifier?
@identifier.to_s
end
|
165
166
167
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 165
def input?
@input
end
|
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 480
def (
i = commandline_arguments_containing_hyphens?
)
if i.is_a? Array
i.each {|entry| (entry) }
else
case i
when /^-?-?disable(-|_)?colours$/
disable_colours
end
end
end
|
#observed_frequency(i = :AA) ⇒ Object
#
observed_frequency
This method must return a String.
#
459
460
461
462
463
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 459
def observed_frequency(i = :AA)
(
( count(i) / total_count?.to_f ).round(7).to_s.ljust(9,'0').to_s
).ljust(12)
end
|
#output_full_line_for(this = :TT) ⇒ Object
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 362
def output_full_line_for(
this = :TT
)
original_this = this
m_pad = ' '
l = LEFT_PADDING method_result = self.send(('count_'+this.to_s).to_sym)
result = this.to_s+' '+method_result.to_s.rjust(l)+m_pad+
observed_frequency(original_this)+' '+EXPECTED_FREQUENCY+
' '+expected_frequency(original_this)+N
if this.to_s == 'CG'
result = consider_colourizing_CpG_islands(result)
end
return result
end
|
#reset ⇒ Object
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 104
def reset
super()
infer_the_namespace
@identifier = nil
@hash = {}
@hash.default = 0
set_word_size DEFAULT_WORD_SIZE
end
|
#result? ⇒ Boolean
Also known as:
result_as_string?
388
389
390
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 388
def result?
@result
end
|
#run ⇒ Object
501
502
503
504
505
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 501
def run
determine_result
show_result
end
|
#set_identifier(i) ⇒ Object
172
173
174
175
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 172
def set_identifier(i)
i =~ />gi\|\d+\|ref\|(NM.+)\|/ @identifier = $1.to_s.dup
end
|
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 133
def set_input(i = '')
i = i.first if i.is_a? Array
i = i.to_s.dup
if File.exist? i
i = File.read(i)
end
if i.empty?
opnn; erev 'Please provide an input, either an existing file, or the'
opnn; erev 'raw FASTA sequence embedded in "" quotes, for instance.'
end
if i.start_with? '>'
substring = i[0, i.index(N)]
set_identifier(substring)
i[0, substring.size] = ''
i.lstrip!
end
i.delete!(N) if i.include? N
i.delete!(R) if i.include? R
i.upcase! @input = i
end
|
#set_word_size(i = DEFAULT_WORD_SIZE) ⇒ Object
#
set_word_size
This sets the word size, also known as “word length”.
#
124
125
126
127
128
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 124
def set_word_size(
i = DEFAULT_WORD_SIZE
)
@word_size = i
end
|
#show_result ⇒ Object
322
323
324
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 322
def show_result
e @result if be_verbose?
end
|
#total_count? ⇒ Boolean
329
330
331
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 329
def total_count?
@hash.values.inject(:+)
end
|
#word_size? ⇒ Boolean
381
382
383
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 381
def word_size?
@word_size
end
|