Class: Bioroebe::CountAmountOfNucleotides

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/count/count_amount_of_nucleotides.rb

Overview

Bioroebe::CountAmountOfNucleotides

Constant Summary collapse

USE_THIS_START_CODON =
#

USE_THIS_START_CODON

#
'ATG'
DEFAULT_DNA_STRING_TO_USE =
#

DEFAULT_DNA_STRING_TO_USE

#
'ATCGAAAAAAATAAATAAAACAAATATATA'
FORMAT_STRING1 =
#

FORMAT_STRING1

#
'%-12s'
FORMAT_STRING2 =
#

FORMAT_STRING2

#
'%4s'

Constants inherited from CommandlineApplication

Bioroebe::CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

Bioroebe::ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(this_dna_string = DEFAULT_DNA_STRING_TO_USE, run_already = true) ⇒ CountAmountOfNucleotides

#

intitialize

#


49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 49

def initialize(
    this_dna_string = DEFAULT_DNA_STRING_TO_USE,
    run_already     = true
  )
  super()
  reset
  this_dna_string = parse_commandline_arguments(this_dna_string)
  set_dna_string(
    this_dna_string
  )
  # ======================================================================= #
  # === Handle blocks given to this method next:
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    when :display_short_form
      @display_short_form = true
    else
      if yielded.is_a? Hash
        # ================================================================= #
        # === :use_colours
        # ================================================================= #
        if yielded.has_key? :use_colours
          set_use_colours(yielded.delete(:use_colours))
        end
      end
    end
  end
  case run_already
  # ======================================================================= #
  # === :do_not_run_yet
  # ======================================================================= #
  when :do_not_run_yet
    run_already = false
  # ======================================================================= #
  # === :use_cliner
  # ======================================================================= #
  when :use_cliner
    @use_cliner = true
    run_already = true
  end
  run if run_already
end

Class Method Details

.show_composition(i) ⇒ Object

#

CountAmountOfNucleotides.show_composition

This class method will return a Hash with the proper A,T,C,G count.

Specific Usage Example:

CountAmountOfNucleotides.show_composition "ATATTTGCCCTATTTTTCC" # => {"a"=>3, "t"=>10, "g"=>1, "c"=>5}
#


457
458
459
460
461
462
463
464
465
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 457

def self.show_composition(i)
  _ = CountAmountOfNucleotides.new(i, false)
  hash = {}
  hash['A'] = _.n_a # n Adenines
  hash['T'] = _.n_t # n Thymines
  hash['G'] = _.n_g # n Guanosines
  hash['C'] = _.n_c # n Cytosines
  return hash
end

Instance Method Details

#dna_string?Boolean

#

dna_string?

#

Returns:

  • (Boolean)


259
260
261
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 259

def dna_string?
  @dna_string
end
#

menu (menu tag)

#


155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 155

def menu(i)
  if i.is_a? Array
    i.each {|entry| menu(entry) }
  else
    case i # case tag
    # ===================================================================== #
    # === --short-form
    # ===================================================================== #
    when /^-?-?short(-|_)?form$/i,
         /^-?-?short$/
      @display_short_form = true
    end
  end
end

#n_a(i = @dna_string) ⇒ Object

#

n_a

#


231
232
233
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 231

def n_a(i = @dna_string)
  i.count('A')
end

#n_c(i = @dna_string) ⇒ Object

#

n_c

#


245
246
247
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 245

def n_c(i = @dna_string)
  i.count('C')
end

#n_g(i = @dna_string) ⇒ Object

#

n_g

#


238
239
240
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 238

def n_g(i = @dna_string)
  i.count('G')
end

#n_t(i = @dna_string) ⇒ Object

#

n_t

#


252
253
254
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 252

def n_t(i = @dna_string)
  i.count('T')
end

#parse_commandline_arguments(i) ⇒ Object

#

parse_commandline_arguments

#


135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 135

def parse_commandline_arguments(i)
  if i.is_a? Array
    if i.any? {|entry| entry.start_with? '--' }
      # =================================================================== #
      # All entries leading with two '-' will be passed into menu()
      # and also be removed from the given input. We let code in
      # base.rb deal with this, though.
      # =================================================================== #
      menu(
        select_entries_starting_with_two_hyphens(i)
      )
      i.reject! {|entry| entry.start_with? '--' }
    end
  end
  i
end

#report_AT_content(n_percent = :calculate_anew) ⇒ Object

#

report_AT_content

This method is, for now, a dependent method - the method report_GC_content does the calculation.

#


208
209
210
211
212
213
214
215
216
217
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 208

def report_AT_content(n_percent = :calculate_anew)
  case n_percent
  when :calculate_anew
    n_percent = ((
                  @hash_percentages['A']+@hash_percentages['T']
                ) * 100.0) / @dna_string.size
  end
  erev 'The AT content (A+T) is: '+
        sfancy(n_percent.round(2).to_s)+rev+' %'
end

#report_everything(i = @dna_string) ⇒ Object

#

report_everything

#


333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 333

def report_everything(
    i = @dna_string
  )
  if i.empty?
    erev '@dna_string is empty, this means that you have not yet assigned a '
    erev 'string. Please assign a DNA string to this class first.'
  else
    # ===================================================================== #
    # This else clause can output too much / too long strings.
    #
    # Since this can become quite spammy, we will report a truncated
    # version.
    # ===================================================================== #
    erev "Analyzing the DNA string `"+
      colourize_dna(
        remove_escape_sequences(
          return_truncated_dna_string(:default, false)
        ),
        use_colours?
      )+
      "#{rev}"+
      return_colourized_truncated_at_message+
      "`.#{N}#{N}"
    n_nucleotides = i.size.to_i
    erev "These #{simp(n_nucleotides.to_s)}#{rev} Nucleotides "\
         "were found:#{rev}#{N}#{N}"
    cliner if @use_cliner
    set_percentage_of(:adenine)
    e
    erev @l_padding+slateblue(@format_string1 % 'Adenines: ')+simp(
      (@format_string2 % n_a)
    )+@percentage
    set_percentage_of(:guanine)
    erev @l_padding+slateblue(@format_string1 % 'Guanines: ')+simp(
      (@format_string2 % n_g)
    )+@percentage
    set_percentage_of(:cytosine)
    erev @l_padding+slateblue(@format_string1 % 'Cytosines: ')+simp(
      (@format_string2 % n_c)
    )+@percentage
    set_percentage_of(:thymine)
    erev @l_padding+slateblue(@format_string1 % 'Thymine: ')+simp(
      (@format_string2 % n_t)
    )+@percentage+N+rev # Append extra newline.
    cliner if @use_cliner
    report_total_amount
    report_how_many_start_codons_exist
  end
end

#report_GC_contentObject Also known as: report_gc_content

#

report_GC_content

#


195
196
197
198
199
200
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 195

def report_GC_content
  n_percent = ((
    @hash_percentages['G']+@hash_percentages['C']
  ) * 100.0) / @dna_string.size
  erev "The GC content (G+C) is: #{sfancy(n_percent.round(2).to_s)}#{rev} %"
end

#report_how_many_nucleotides_we_have_foundObject

#

report_how_many_nucleotides_we_have_found

#


308
309
310
311
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 308

def report_how_many_nucleotides_we_have_found
  erev "#{N}In total "\
       "#{simportant(@dna_string.size.to_s)}#{rev} nucleotides were found."
end

#report_how_many_start_codons_existObject

#

report_how_many_start_codons_exist

#


316
317
318
319
320
321
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 316

def report_how_many_start_codons_exist
  n_start_codons = dna_string?.scan(/#{USE_THIS_START_CODON}/).size
  erev "In the given DNA string "\
       "#{simp(n_start_codons)}#{rev} start codons ("\
       "#{return_colourized_start_codon}#{rev}) were found.#{N}"
end

#report_the_amount_of_nucleotides_in_short_form_on_a_single_lineObject

#

report_the_amount_of_nucleotides_in_short_form_on_a_single_line

#


280
281
282
283
284
285
286
287
288
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 280

def report_the_amount_of_nucleotides_in_short_form_on_a_single_line
  # ======================================================================= #
  # Output will be:
  #
  #   A G C T
  #
  # ======================================================================= #
  e return_the_amount_of_nucleotides_in_short_form_on_a_single_line
end

#report_total_amountObject

#

report_total_amount

#


222
223
224
225
226
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 222

def report_total_amount
  report_how_many_nucleotides_we_have_found
  report_AT_content
  report_GC_content
end

#resetObject

#

reset

#


97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 97

def reset
  super()
  # ======================================================================= #
  # === @format_string1
  # ======================================================================= #
  @format_string1 = FORMAT_STRING1
  # ======================================================================= #
  # === @format_string2
  # ======================================================================= #
  @format_string2 = FORMAT_STRING2
  # ======================================================================= #
  # === @use_cliner
  # ======================================================================= #
  @use_cliner = false
  # ======================================================================= #
  # === @display_short_form
  # ======================================================================= #
  @display_short_form = false # If this is true, we will only report the numbers.
  # ======================================================================= #
  # === @hash_percentages
  # ======================================================================= #
  @hash_percentages = {}
  # ======================================================================= #
  # === @l_padding
  #
  # l_padding means left_padding. We must make this more dynamic,
  # in that it must depend on the amount of nucleotides given.
  # ======================================================================= #
  @l_padding  = '  '
  # ======================================================================= #
  # === @dna_string
  # ======================================================================= #
  @dna_string = ''.dup
end

#return_colourized_start_codonObject Also known as: colourized_start_codon

#

return_colourized_start_codon

#


273
274
275
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 273

def return_colourized_start_codon
  swarn(USE_THIS_START_CODON)
end

#return_colourized_truncated_at_messageObject

#

return_colourized_truncated_at_message

#


326
327
328
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 326

def return_colourized_truncated_at_message
  " [#{swarn('TRUNCATED at 80')}#{rev}]"
end

#return_the_amount_of_nucleotides_in_short_form_on_a_single_lineObject

#

return_the_amount_of_nucleotides_in_short_form_on_a_single_line

The order of nucleotides that is to be favoured here, is:

A C G T
#


298
299
300
301
302
303
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 298

def return_the_amount_of_nucleotides_in_short_form_on_a_single_line
  n_a.to_s+' '+ # A
  n_c.to_s+' '+ # C
  n_g.to_s+' '+ # G
  n_t.to_s      # T
end

#return_truncated_dna_string(do_truncate = :default, add_the_truncated_at_notice = true) ⇒ Object

#

return_truncated_dna_string

We can toggle whether we truncate or whether we will not.

Since as of December 2020, the truncate-notification will only be shown if the input-sequence length exceeds the threshold value.

#


392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 392

def return_truncated_dna_string(
    do_truncate                 = :default,
    add_the_truncated_at_notice = true
  )
  case do_truncate
  # ======================================================================= #
  # === :default
  # ======================================================================= #
  when :default
    do_truncate = Bioroebe.do_truncate?
  end
  dna_string = @dna_string
  if dna_string.size > 100
    _ = ''.dup
    _ << rev if add_the_truncated_at_notice
    _ << dna_string[0..79]
    _ << return_colourized_truncated_at_message if add_the_truncated_at_notice
    dna_string = _
  end if do_truncate
  return dna_string
end

#runObject

#

run (run tag)

#


439
440
441
442
443
444
445
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 439

def run
  if @display_short_form
    report_the_amount_of_nucleotides_in_short_form_on_a_single_line
  else
    report_everything
  end
end

#set_dna_string(i = DEFAULT_DNA_STRING_TO_USE) ⇒ Object

#

set_dna_string

This method also supports loading the content of a file - if the given input is a locally existing file, then the content of said file will be used.

#


421
422
423
424
425
426
427
428
429
430
431
432
433
434
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 421

def set_dna_string(
    i = DEFAULT_DNA_STRING_TO_USE
  )
  i = DEFAULT_DNA_STRING_TO_USE if i.nil?
  if File.exist? i.to_s
    i = File.read(i.to_s).strip.delete(N)
  end
  if i.is_a? Array
    i = i.join(' ').strip
  end
  i = i.to_s.dup if i.frozen?
  i.upcase! # Want to keep it upcased.
  @dna_string = i
end

#set_percentage_of(i) ⇒ Object

#

set_percentage_of

#


173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 173

def set_percentage_of(i)
  case i
  when :adenine,
       :guanine,
       :cytosine,
       :thymine
    i = i.to_s[0,1].capitalize
  end
  n_times = @dna_string.count(i.to_s)
  @hash_percentages[i] = n_times
  n_times = n_times.to_f * 100
  total_size = @dna_string.size.to_f
  percentage = n_times / total_size
  percentage = '%.2f' % percentage
  percentage.prepend ' ' if percentage.size == 4 # Prepend ' '.
  @percentage = rev+' | '+percentage.to_s+' '+
                swarn('%')
end

#string?Boolean Also known as: string

#

string?

#

Returns:

  • (Boolean)


266
267
268
# File 'lib/bioroebe/count/count_amount_of_nucleotides.rb', line 266

def string?
  @string
end