Class: Bioroebe::Compseq
Overview
Constant Summary
collapse
- LEFT_PADDING =
8
- COLOURIZE_CpG_ISLANDS =
#
COLOURIZE_CpG_ISLANDS
If the following constant is set to true, then we will denote CpG islands as red.
#
true
- EXPECTED_FREQUENCY =
'0.0625000'
- DEFAULT_WORD_SIZE =
#
DEFAULT_WORD_SIZE
This is also known as the “word length”.
#
2
Bioroebe::CommandlineApplication::OLD_VERBOSE_VALUE
Bioroebe::ColoursForBase::ARRAY_HTML_COLOURS_IN_USE
Constants inherited
from Base
Base::NAMESPACE
Instance Method Summary
collapse
#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into
Methods included from BaseModule
#absolute_path, #default_file_read, #file_readlines
#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments
#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?
Methods inherited from Base
#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into
#internal_hash?, #reset_the_internal_hash
#infer_the_namespace, #namespace?
Constructor Details
#initialize(i = nil, run_already = true) ⇒ Compseq
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 65
def initialize(
i = nil,
run_already = true
)
reset
set_commandline_arguments(i)
set_input(i)
if block_given?
yielded = yield
case yielded
when :disable_colours
disable_colours
when :be_quiet
set_be_quiet
when :disable_colours_and_be_quiet
disable_colours
set_be_quiet
end
end
run if run_already
end
|
Instance Method Details
#consider_colourizing_CpG_islands(i) ⇒ Object
#
consider_colourizing_CpG_islands
The input will only be colourized if we enabled this via a constant.
#
472
473
474
475
476
477
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 472
def consider_colourizing_CpG_islands(i)
if COLOURIZE_CpG_ISLANDS and use_colours?
i = swarn(i)+rev
end
return i
end
|
#count(i) ⇒ Object
#
count
This method will delegate to the respective AA methods.
#
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 340
def count(i)
case i
when :AA then count_AA
when :AC then count_AC
when :AG then count_AG
when :AT then count_AT
when :CA then count_CA
when :CC then count_CC
when :CG then count_CG
when :CT then count_CT
when :GA then count_GA
when :GC then count_GC
when :GG then count_GG
when :GT then count_GT
when :TA then count_TA
when :TC then count_TC
when :TG then count_TG
when :TT then count_TT
end
end
|
#count_AA ⇒ Object
317
318
319
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 317
def count_AA
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_AC ⇒ Object
310
311
312
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 310
def count_AC
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_AG ⇒ Object
303
304
305
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 303
def count_AG
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_AT ⇒ Object
296
297
298
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 296
def count_AT
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_CA ⇒ Object
289
290
291
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 289
def count_CA
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_CC ⇒ Object
282
283
284
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 282
def count_CC
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_CG ⇒ Object
275
276
277
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 275
def count_CG
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_CT ⇒ Object
268
269
270
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 268
def count_CT
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_frequencies ⇒ Object
#
count_frequencies
This will populate our main hash.
#
191
192
193
194
195
196
197
198
199
200
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 191
def count_frequencies
characters = input?.chars
characters.each_with_index {|char, index|
next_character = input?[index+1, 1]
if next_character
joined = char+next_character
@hash[joined] += 1 if joined.size > 1
end
}
end
|
#count_GA ⇒ Object
261
262
263
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 261
def count_GA
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_GC ⇒ Object
254
255
256
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 254
def count_GC
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_GG ⇒ Object
247
248
249
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 247
def count_GG
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_GT ⇒ Object
240
241
242
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 240
def count_GT
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_TA ⇒ Object
233
234
235
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 233
def count_TA
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_TC ⇒ Object
226
227
228
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 226
def count_TC
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_TG ⇒ Object
219
220
221
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 219
def count_TG
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#count_TT ⇒ Object
212
213
214
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 212
def count_TT
hash?[__method__.to_s.gsub(/count_/,'')]
end
|
#determine_result ⇒ Object
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 397
def determine_result
count_frequencies
@result = rev+"#
# Output from '#{steelblue('Bioroebe::Compseq')}#{rev}'
#
# The Expected frequencies are calculated on the (false) assumption that every
# word has equal frequency. #{swarn('Red')}#{rev} denotes possible CpG islands.
#
# The input sequences are:
# "+identifier?+"
Word size "+word_size?.to_s+'
Total count '+total_count?.to_s.rjust(3)+"
#
# Word Obs Count Obs Frequency Exp Frequency Obs/Exp Frequency
#
"+
output_full_line_for(:AA)+
output_full_line_for(:AC)+
output_full_line_for(:AG)+
output_full_line_for(:AT)+
output_full_line_for(:CA)+
output_full_line_for(:CC)+
output_full_line_for(:CG)+
output_full_line_for(:CT)+
output_full_line_for(:GA)+
output_full_line_for(:GC)+
output_full_line_for(:GG)+
output_full_line_for(:GT)+
output_full_line_for(:TA)+
output_full_line_for(:TC)+
output_full_line_for(:TG)+
output_full_line_for(:TT)+'
Other 0 0.0000000 0.0000000 10000000000.0000000'
end
|
#expected? ⇒ Boolean
#
expected?
Must return a Float.
#
439
440
441
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 439
def expected?
EXPECTED_FREQUENCY.to_f
end
|
#expected_frequency(for_this_sequence = :AA) ⇒ Object
446
447
448
449
450
451
452
453
454
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 446
def expected_frequency(
for_this_sequence = :AA
)
(
(
observed_frequency(for_this_sequence).to_f / expected?
).to_s.ljust(9,'0')
).rjust(12)
end
|
#hash? ⇒ Boolean
205
206
207
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 205
def hash?
@hash
end
|
#identifier? ⇒ Boolean
#
identifier?
This method must guarantee to return a String.
#
182
183
184
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 182
def identifier?
@internal_hash[:identifier].to_s
end
|
165
166
167
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 165
def input?
@input
end
|
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 482
def (
i = commandline_arguments_containing_hyphens?
)
if i.is_a? Array
i.each {|entry| (entry) }
else
case i
when /^-?-?disable(-|_)?colours$/
disable_colours
end
end
end
|
#observed_frequency(i = :AA) ⇒ Object
#
observed_frequency
This method must return a String.
#
461
462
463
464
465
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 461
def observed_frequency(i = :AA)
(
( count(i) / total_count?.to_f ).round(7).to_s.ljust(9,'0').to_s
).ljust(12)
end
|
#output_full_line_for(this = :TT) ⇒ Object
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 364
def output_full_line_for(
this = :TT
)
original_this = this
m_pad = ' '
l = LEFT_PADDING method_result = self.send(('count_'+this.to_s).to_sym)
result = this.to_s+' '+method_result.to_s.rjust(l)+m_pad+
observed_frequency(original_this)+' '+EXPECTED_FREQUENCY+
' '+expected_frequency(original_this)+N
if this.to_s == 'CG'
result = consider_colourizing_CpG_islands(result)
end
return result
end
|
#reset ⇒ Object
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 104
def reset
super()
infer_the_namespace
@internal_hash[:identifier] = nil
@hash = {}
@hash.default = 0
set_word_size DEFAULT_WORD_SIZE
end
|
#result? ⇒ Boolean
Also known as:
result_as_string?
390
391
392
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 390
def result?
@result
end
|
#run ⇒ Object
503
504
505
506
507
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 503
def run
determine_result
show_result
end
|
#set_identifier(i) ⇒ Object
172
173
174
175
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 172
def set_identifier(i)
i =~ />gi\|\d+\|ref\|(NM.+)\|/ @internal_hash[:identifier] = $1.to_s.dup
end
|
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 133
def set_input(i = '')
i = i.first if i.is_a? Array
i = i.to_s.dup
if File.exist? i
i = File.read(i)
end
if i.empty?
opnerev 'Please provide an input, either an existing file, or the'
opnerev 'raw FASTA sequence embedded in "" quotes, for instance.'
end
if i.start_with? '>'
substring = i[0, i.index(N)]
set_identifier(substring)
i[0, substring.size] = ''
i.lstrip!
end
i.delete!(N) if i.include? N
i.delete!(R) if i.include? R
i.upcase! @input = i
end
|
#set_word_size(i = DEFAULT_WORD_SIZE) ⇒ Object
#
set_word_size
This sets the word size, also known as “word length”.
#
124
125
126
127
128
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 124
def set_word_size(
i = DEFAULT_WORD_SIZE
)
@internal_hash[:word_size] = i
end
|
#show_result ⇒ Object
324
325
326
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 324
def show_result
e @result if be_verbose?
end
|
#total_count? ⇒ Boolean
331
332
333
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 331
def total_count?
@hash.values.inject(:+)
end
|
#word_size? ⇒ Boolean
383
384
385
|
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 383
def word_size?
@internal_hash[:word_size]
end
|