Class: Bioroebe::Compseq

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/utility_scripts/compseq/compseq.rb

Overview

Bioroebe::Compseq

Constant Summary collapse

LEFT_PADDING =
#

LEFT_PADDING

#
8
COLOURIZE_CpG_ISLANDS =
#

COLOURIZE_CpG_ISLANDS

If the following constant is set to true, then we will denote CpG islands as red.

#
true
EXPECTED_FREQUENCY =
#

EXPECTED_FREQUENCY

#
'0.0625000'
DEFAULT_WORD_SIZE =
#

DEFAULT_WORD_SIZE

This is also known as the “word length”.

#
2

Constants inherited from CommandlineApplication

Bioroebe::CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

Bioroebe::ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opne, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #set_be_verbose, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #file_readlines, #infer_the_namespace, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #namespace?, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Constructor Details

#initialize(i = nil, run_already = true) ⇒ Compseq

#

initialize

#

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 65

def initialize(
    i           = nil,
    run_already = true
  )
  reset
  # ======================================================================= #
  # The reason we have two setters here is mostly for legacy-reasons.
  # The method call used to be set_input(), until the other setter
  # was added in May 2020.
  # ======================================================================= #
  set_commandline_arguments(i)
  set_input(i)
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === :disable_colours
    # ===================================================================== #
    when :disable_colours
      disable_colours
    # ===================================================================== #
    # === :be_quiet
    # ===================================================================== #
    when :be_quiet
      set_be_quiet
    # ===================================================================== #
    # === :disable_colours_and_be_quiet
    # ===================================================================== #
    when :disable_colours_and_be_quiet
      disable_colours
      set_be_quiet
    end
  end
  run if run_already
end

Instance Method Details

#consider_colourizing_CpG_islands(i) ⇒ Object

#

consider_colourizing_CpG_islands

The input will only be colourized if we enabled this via a constant.

#

470
471
472
473
474
475
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 470

def consider_colourizing_CpG_islands(i)
  if COLOURIZE_CpG_ISLANDS and use_colours?
    i = swarn(i)+rev
  end
  return i
end

#count(i) ⇒ Object

#

count

This method will delegate to the respective AA methods.

#

338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 338

def count(i)
  case i
  when :AA then count_AA
  when :AC then count_AC
  when :AG then count_AG
  when :AT then count_AT
  when :CA then count_CA
  when :CC then count_CC
  when :CG then count_CG
  when :CT then count_CT
  when :GA then count_GA
  when :GC then count_GC
  when :GG then count_GG
  when :GT then count_GT
  when :TA then count_TA
  when :TC then count_TC
  when :TG then count_TG
  when :TT then count_TT
  end
end

#count_AAObject

#

count_AA

#

315
316
317
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 315

def count_AA
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_ACObject

#

count_AC

#

308
309
310
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 308

def count_AC
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_AGObject

#

count_AG

#

301
302
303
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 301

def count_AG
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_ATObject

#

count_AT

#

294
295
296
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 294

def count_AT
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_CAObject

#

count_CA

#

287
288
289
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 287

def count_CA
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_CCObject

#

count_CC

#

280
281
282
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 280

def count_CC
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_CGObject

#

count_CG

#

273
274
275
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 273

def count_CG
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_CTObject

#

count_CT

#

266
267
268
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 266

def count_CT
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_frequenciesObject

#

count_frequencies

This will populate our main hash.

#

189
190
191
192
193
194
195
196
197
198
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 189

def count_frequencies
  characters = input?.chars
  characters.each_with_index {|char, index|
    next_character = input?[index+1, 1]
    if next_character
      joined = char+next_character
      @hash[joined] += 1 if joined.size > 1
    end
  }
end

#count_GAObject

#

count_GA

#

259
260
261
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 259

def count_GA
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_GCObject

#

count_GC

#

252
253
254
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 252

def count_GC
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_GGObject

#

count_GG

#

245
246
247
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 245

def count_GG
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_GTObject

#

count_GT

#

238
239
240
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 238

def count_GT
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_TAObject

#

count_TA

#

231
232
233
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 231

def count_TA
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_TCObject

#

count_TC

#

224
225
226
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 224

def count_TC
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_TGObject

#

count_TG

#

217
218
219
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 217

def count_TG
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#count_TTObject

#

count_TT

#

210
211
212
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 210

def count_TT
  hash?[__method__.to_s.gsub(/count_/,'')]
end

#determine_resultObject

#

determine_result

#

395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 395

def determine_result
  count_frequencies
  @result = rev+"#
# Output from '#{steelblue('Bioroebe::Compseq')}#{rev}'
#
# The Expected frequencies are calculated on the (false) assumption that every
# word has equal frequency. #{swarn('Red')}#{rev} denotes possible CpG islands.
#
# The input sequences are:
#    "+identifier?+"

Word size      "+word_size?.to_s+'
Total count  '+total_count?.to_s.rjust(3)+"

#
# Word  Obs Count   Obs Frequency  Exp Frequency  Obs/Exp Frequency
#
"+
output_full_line_for(:AA)+
output_full_line_for(:AC)+
output_full_line_for(:AG)+
output_full_line_for(:AT)+
output_full_line_for(:CA)+
output_full_line_for(:CC)+
output_full_line_for(:CG)+
output_full_line_for(:CT)+
output_full_line_for(:GA)+
output_full_line_for(:GC)+
output_full_line_for(:GG)+
output_full_line_for(:GT)+
output_full_line_for(:TA)+
output_full_line_for(:TC)+
output_full_line_for(:TG)+
output_full_line_for(:TT)+'
Other      0         0.0000000  0.0000000  10000000000.0000000'
end

#expected?Boolean

#

expected?

Must return a Float.

#

Returns:

  • (Boolean)

437
438
439
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 437

def expected?
  EXPECTED_FREQUENCY.to_f
end

#expected_frequency(for_this_sequence = :AA) ⇒ Object

#

expected_frequency

#

444
445
446
447
448
449
450
451
452
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 444

def expected_frequency(
    for_this_sequence = :AA
  )
  (
    (
      observed_frequency(for_this_sequence).to_f / expected?
    ).to_s.ljust(9,'0')
  ).rjust(12)
end

#hash?Boolean

#

hash?

#

Returns:

  • (Boolean)

203
204
205
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 203

def hash?
  @hash
end

#identifier?Boolean

#

identifier?

#

Returns:

  • (Boolean)

180
181
182
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 180

def identifier?
  @identifier.to_s
end

#input?Boolean Also known as: sequence?

#

input?

#

Returns:

  • (Boolean)

165
166
167
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 165

def input?
  @input
end
#

menu (menu tag)

#

480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 480

def menu(
    i = commandline_arguments_containing_hyphens?
  )
  if i.is_a? Array
    i.each {|entry| menu(entry) }
  else
    case i
    # ===================================================================== #
    # === --disable-colours
    #
    # This can be used to simply disable the colours.
    # ===================================================================== #
    when /^-?-?disable(-|_)?colours$/
      disable_colours
    end
  end
end

#observed_frequency(i = :AA) ⇒ Object

#

observed_frequency

This method must return a String.

#

459
460
461
462
463
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 459

def observed_frequency(i = :AA)
  (
    ( count(i) / total_count?.to_f ).round(7).to_s.ljust(9,'0').to_s
  ).ljust(12)
end

#output_full_line_for(this = :TT) ⇒ Object

#

output_full_line_for

#

362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 362

def output_full_line_for(
    this = :TT
  )
  original_this = this
  m_pad = '         '
  l = LEFT_PADDING # This is left-padding.
  method_result = self.send(('count_'+this.to_s).to_sym)
  result = this.to_s+'  '+method_result.to_s.rjust(l)+m_pad+
           observed_frequency(original_this)+'   '+EXPECTED_FREQUENCY+
           '   '+expected_frequency(original_this)+N
  if this.to_s == 'CG'
    result = consider_colourizing_CpG_islands(result)
  end
  return result
end

#resetObject

#

reset (reset tag)

#

104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 104

def reset
  super()
  infer_the_namespace
  # ======================================================================= #
  # === @identifier
  # ======================================================================= #
  @identifier = nil
  # ======================================================================= #
  # === @hash
  # ======================================================================= #
  @hash = {}
  @hash.default = 0
  set_word_size DEFAULT_WORD_SIZE
end

#result?Boolean Also known as: result_as_string?

#

result?

#

Returns:

  • (Boolean)

388
389
390
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 388

def result?
  @result
end

#runObject

#

run (run tag)

#

501
502
503
504
505
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 501

def run
  menu
  determine_result
  show_result
end

#set_identifier(i) ⇒ Object

#

set_identifier

#

172
173
174
175
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 172

def set_identifier(i)
  i =~ />gi\|\d+\|ref\|(NM.+)\|/ # http://rubular.com/r/dbx08WzdAZ
  @identifier = $1.to_s.dup
end

#set_input(i = '') ⇒ Object

#

set_input

#

133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 133

def set_input(i = '')
  i = i.first if i.is_a? Array
  i = i.to_s.dup
  if File.exist? i
    i = File.read(i)
  end
  # ======================================================================= #
  # === Handle empty input given next
  # ======================================================================= #
  if i.empty?
    opnn; erev 'Please provide an input, either an existing file, or the'
    opnn; erev 'raw FASTA sequence embedded in "" quotes, for instance.'
  end
  # ======================================================================= #
  # === Handle > FASTA identifier next
  # ======================================================================= #
  if i.start_with? '>'
    # Assume identifer given here.
    substring = i[0, i.index(N)]
    set_identifier(substring)
    i[0, substring.size] = ''
    i.lstrip!
  end
  i.delete!(N) if i.include? N
  i.delete!(R) if i.include? R
  i.upcase! # This here since as of Feb 2016.
  @input = i
end

#set_word_size(i = DEFAULT_WORD_SIZE) ⇒ Object

#

set_word_size

This sets the word size, also known as “word length”.

#

124
125
126
127
128
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 124

def set_word_size(
    i = DEFAULT_WORD_SIZE
  )
  @word_size = i
end

#show_resultObject

#

show_result

#

322
323
324
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 322

def show_result
  e @result if be_verbose?
end

#total_count?Boolean

#

total_count?

#

Returns:

  • (Boolean)

329
330
331
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 329

def total_count?
  @hash.values.inject(:+)
end

#word_size?Boolean

#

word_size?

#

Returns:

  • (Boolean)

381
382
383
# File 'lib/bioroebe/utility_scripts/compseq/compseq.rb', line 381

def word_size?
  @word_size
end