Class: Bioroebe::Compseq

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/utility_scripts/compseq/compseq.rb

Overview

Bioroebe::Compseq

Constant Summary collapse

LEFT_PADDING =
#

LEFT_PADDING

#
8
COLOURIZE_CpG_ISLANDS =
#

COLOURIZE_CpG_ISLANDS

If the following constant is set to true, then we will denote CpG islands as red.

#
true
EXPECTED_FREQUENCY =
#

EXPECTED_FREQUENCY

#
'0.0625000'
DEFAULT_WORD_SIZE =
#

DEFAULT_WORD_SIZE

This is also known as the “word length”.

#
2

Constants inherited from CommandlineApplication

Bioroebe::CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

Bioroebe::ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = nil, run_already = true) ⇒ Compseq

#

initialize

#


65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 65

def initialize(
    i           = nil,
    run_already = true
  )
  reset
  # ======================================================================= #
  # The reason we have two setters here is mostly for legacy-reasons.
  # The method call used to be set_input(), until the other setter
  # was added in May 2020.
  # ======================================================================= #
  set_commandline_arguments(i)
  set_input(i)
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === :disable_colours
    # ===================================================================== #
    when :disable_colours
      disable_colours
    # ===================================================================== #
    # === :be_quiet
    # ===================================================================== #
    when :be_quiet
      set_be_quiet
    # ===================================================================== #
    # === :disable_colours_and_be_quiet
    # ===================================================================== #
    when :disable_colours_and_be_quiet
      disable_colours
      set_be_quiet
    end
  end
  run if run_already
end

Instance Method Details

#consider_colourizing_CpG_islands(i) ⇒ Object

#

consider_colourizing_CpG_islands

The input will only be colourized if we enabled this via a constant.

#


472
473
474
475
476
477
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 472

def consider_colourizing_CpG_islands(i)
  if COLOURIZE_CpG_ISLANDS and use_colours?
    i = swarn(i)+rev
  end
  return i
end

#count(i) ⇒ Object

#

count

This method will delegate to the respective AA methods.

#


340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 340

def count(i)
  case i
  when :AA then count_AA
  when :AC then count_AC
  when :AG then count_AG
  when :AT then count_AT
  when :CA then count_CA
  when :CC then count_CC
  when :CG then count_CG
  when :CT then count_CT
  when :GA then count_GA
  when :GC then count_GC
  when :GG then count_GG
  when :GT then count_GT
  when :TA then count_TA
  when :TC then count_TC
  when :TG then count_TG
  when :TT then count_TT
  end
end

#count_AAObject

#

count_AA

#


317
318
319
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 317

def count_AA
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_ACObject

#

count_AC

#


310
311
312
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 310

def count_AC
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_AGObject

#

count_AG

#


303
304
305
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 303

def count_AG
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_ATObject

#

count_AT

#


296
297
298
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 296

def count_AT
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_CAObject

#

count_CA

#


289
290
291
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 289

def count_CA
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_CCObject

#

count_CC

#


282
283
284
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 282

def count_CC
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_CGObject

#

count_CG

#


275
276
277
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 275

def count_CG
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_CTObject

#

count_CT

#


268
269
270
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 268

def count_CT
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_frequenciesObject

#

count_frequencies

This will populate our main hash.

#


191
192
193
194
195
196
197
198
199
200
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 191

def count_frequencies
  characters = input?.chars
  characters.each_with_index {|char, index|
    next_character = input?[index+1, 1]
    if next_character
      joined = char+next_character
      @hash[joined] += 1 if joined.size > 1
    end
  }
end

#count_GAObject

#

count_GA

#


261
262
263
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 261

def count_GA
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_GCObject

#

count_GC

#


254
255
256
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 254

def count_GC
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_GGObject

#

count_GG

#


247
248
249
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 247

def count_GG
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_GTObject

#

count_GT

#


240
241
242
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 240

def count_GT
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_TAObject

#

count_TA

#


233
234
235
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 233

def count_TA
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_TCObject

#

count_TC

#


226
227
228
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 226

def count_TC
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_TGObject

#

count_TG

#


219
220
221
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 219

def count_TG
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_TTObject

#

count_TT

#


212
213
214
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 212

def count_TT
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#determine_resultObject

#

determine_result

#


397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 397

def determine_result
  count_frequencies
  @result = rev+"#
# Output from '#{steelblue('Bioroebe::Compseq')}#{rev}'
#
# The Expected frequencies are calculated on the (false) assumption that every
# word has equal frequency. #{swarn('Red')}#{rev} denotes possible CpG islands.
#
# The input sequences are:
#    "+identifier?+"

Word size      "+word_size?.to_s+'
Total count  '+total_count?.to_s.rjust(3)+"

#
# Word  Obs Count   Obs Frequency  Exp Frequency  Obs/Exp Frequency
#
"+
output_full_line_for(:AA)+
output_full_line_for(:AC)+
output_full_line_for(:AG)+
output_full_line_for(:AT)+
output_full_line_for(:CA)+
output_full_line_for(:CC)+
output_full_line_for(:CG)+
output_full_line_for(:CT)+
output_full_line_for(:GA)+
output_full_line_for(:GC)+
output_full_line_for(:GG)+
output_full_line_for(:GT)+
output_full_line_for(:TA)+
output_full_line_for(:TC)+
output_full_line_for(:TG)+
output_full_line_for(:TT)+'
Other      0         0.0000000  0.0000000  10000000000.0000000'
end

#expected?Boolean

#

expected?

Must return a Float.

#

Returns:

  • (Boolean)


439
440
441
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 439

def expected?
  EXPECTED_FREQUENCY.to_f
end

#expected_frequency(for_this_sequence = :AA) ⇒ Object

#

expected_frequency

#


446
447
448
449
450
451
452
453
454
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 446

def expected_frequency(
    for_this_sequence = :AA
  )
  (
    (
      observed_frequency(for_this_sequence).to_f / expected?
    ).to_s.ljust(9,'0')
  ).rjust(12)
end

#hash?Boolean

#

hash?

#

Returns:

  • (Boolean)


205
206
207
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 205

def hash?
  @hash
end

#identifier?Boolean

#

identifier?

This method must guarantee to return a String.

#

Returns:

  • (Boolean)


182
183
184
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 182

def identifier?
  @internal_hash[:identifier].to_s
end

#input?Boolean Also known as: sequence?

#

input?

#

Returns:

  • (Boolean)


165
166
167
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 165

def input?
  @input
end
#

menu (menu tag)

#


482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 482

def menu(
    i = commandline_arguments_containing_hyphens?
  )
  if i.is_a? Array
    i.each {|entry| menu(entry) }
  else
    case i
    # ===================================================================== #
    # === --disable-colours
    #
    # This can be used to simply disable the colours.
    # ===================================================================== #
    when /^-?-?disable(-|_)?colours$/
      disable_colours
    end
  end
end

#observed_frequency(i = :AA) ⇒ Object

#

observed_frequency

This method must return a String.

#


461
462
463
464
465
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 461

def observed_frequency(i = :AA)
  (
    ( count(i) / total_count?.to_f ).round(7).to_s.ljust(9,'0').to_s
  ).ljust(12)
end

#output_full_line_for(this = :TT) ⇒ Object

#

output_full_line_for

#


364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 364

def output_full_line_for(
    this = :TT
  )
  original_this = this
  m_pad = '         '
  l = LEFT_PADDING # This is left-padding.
  method_result = self.send(('count_'+this.to_s).to_sym)
  result = this.to_s+'  '+method_result.to_s.rjust(l)+m_pad+
           observed_frequency(original_this)+'   '+EXPECTED_FREQUENCY+
           '   '+expected_frequency(original_this)+N
  if this.to_s == 'CG'
    result = consider_colourizing_CpG_islands(result)
  end
  return result
end

#resetObject

#

reset (reset tag)

#


104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 104

def reset
  super()
  infer_the_namespace
  # ======================================================================= #
  # === @internal_hash[:identifier]
  # ======================================================================= #
  @internal_hash[:identifier] = nil
  # ======================================================================= #
  # === @hash
  # ======================================================================= #
  @hash = {}
  @hash.default = 0
  set_word_size DEFAULT_WORD_SIZE
end

#result?Boolean Also known as: result_as_string?

#

result?

#

Returns:

  • (Boolean)


390
391
392
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 390

def result?
  @result
end

#runObject

#

run (run tag)

#


503
504
505
506
507
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 503

def run
  menu
  determine_result
  show_result
end

#set_identifier(i) ⇒ Object

#

set_identifier

#


172
173
174
175
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 172

def set_identifier(i)
  i =~ />gi\|\d+\|ref\|(NM.+)\|/ # http://rubular.com/r/dbx08WzdAZ
  @internal_hash[:identifier] = $1.to_s.dup
end

#set_input(i = '') ⇒ Object

#

set_input

#


133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 133

def set_input(i = '')
  i = i.first if i.is_a? Array
  i = i.to_s.dup
  if File.exist? i
    i = File.read(i)
  end
  # ======================================================================= #
  # === Handle empty input given next
  # ======================================================================= #
  if i.empty?
    opnerev 'Please provide an input, either an existing file, or the'
    opnerev 'raw FASTA sequence embedded in "" quotes, for instance.'
  end
  # ======================================================================= #
  # === Handle > FASTA identifier next
  # ======================================================================= #
  if i.start_with? '>'
    # Assume identifer given here.
    substring = i[0, i.index(N)]
    set_identifier(substring)
    i[0, substring.size] = ''
    i.lstrip!
  end
  i.delete!(N) if i.include? N
  i.delete!(R) if i.include? R
  i.upcase! # This here since as of Feb 2016.
  @input = i
end

#set_word_size(i = DEFAULT_WORD_SIZE) ⇒ Object

#

set_word_size

This sets the word size, also known as “word length”.

#


124
125
126
127
128
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 124

def set_word_size(
    i = DEFAULT_WORD_SIZE
  )
  @internal_hash[:word_size] = i
end

#show_resultObject

#

show_result

#


324
325
326
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 324

def show_result
  e @result if be_verbose?
end

#total_count?Boolean

#

total_count?

#

Returns:

  • (Boolean)


331
332
333
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 331

def total_count?
  @hash.values.inject(:+)
end

#word_size?Boolean

#

word_size?

#

Returns:

  • (Boolean)


383
384
385
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 383

def word_size?
  @internal_hash[:word_size]
end