Class: Bioroebe::CalculateGCContent

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/calculate/calculate_gc_content.rb

Overview

Biooebe::CalculateGCContent

Constant Summary collapse

DEFAULT_INPUT =
#

DEFAULT_INPUT

#
'ATCATTTTCTTTAAACGGGAAAT'
ROUND_TO_N_DECIMAL_POSITIONS =
#

ROUND_TO_N_DECIMAL_POSITIONS

This constant determines onto how many decimals we will round the percentage result.

#
4

Constants inherited from CommandlineApplication

Bioroebe::CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

Bioroebe::ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = DEFAULT_INPUT, round_to_n_numbers = ROUND_TO_N_DECIMAL_POSITIONS, run_already = true) ⇒ CalculateGCContent

#

initialize

The first argument is the input string that we are going to test.

#


41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 41

def initialize(
    i                  = DEFAULT_INPUT,
    round_to_n_numbers = ROUND_TO_N_DECIMAL_POSITIONS,
    run_already        = true
  )
  reset
  set_data(i)
  if round_to_n_numbers.to_s.include? 'dont'
    round_to_n_numbers = ROUND_TO_N_DECIMAL_POSITIONS
    run_already = false
  end
  set_round_to_n_numbers(round_to_n_numbers)
  run if run_already
end

Class Method Details

.gc_percentage(of_this_string, round_to_n_positions = 2) ⇒ Object

#

Bioroebe::CalculateGCContent.gc_percentage

This will just return the specific percentage value.

Invocation examples:

Bioroebe::CalculateGCContent.gc_percentage 'CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGACTGGGAACCTGCGGGCAGTAGGTGGAAT'
Bioroebe::CalculateGCContent.gc_percentage 'CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGACTGGGAACCTGCGGGCAGTAGGTGGAAT',7
#


245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 245

def self.gc_percentage(
    of_this_string,
    round_to_n_positions = 2
  )
  if of_this_string and File.file?(of_this_string)
    of_this_string = File.read(of_this_string).strip
    if of_this_string.include? '>' # Assume FASTA in this case.
    end
  end
  n_g = of_this_string.count('G')
  n_c = of_this_string.count('C')
  result = ( (n_g + n_c) * 100.0 / of_this_string.size.to_f )
  result = ('%.'+round_to_n_positions.to_s+'f') % result.to_f
  return result
end

Instance Method Details

#compare_gc_contentObject

#

compare_gc_content

This will batch-compare the GC content of all registered fasta files this class presently handles.

Has to be invoked by the end user.

#


221
222
223
224
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 221

def compare_gc_content
  set_be_verbose
  calculate_gc_content
end

#gc_content_in_percent?Boolean Also known as: results, results?

#

gc_content_in_percent?

This will return the Array that holds our GC content, in percent.

#

Returns:

  • (Boolean)


164
165
166
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 164

def gc_content_in_percent?
  @gc_content_in_percent
end

#process_everythingObject

#

process_everything

#


172
173
174
175
176
177
178
179
180
181
182
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 172

def process_everything
  @array_strings.each {|entry|
    string = entry.gsub(/ /,'') # Kill ' ' first.
    total_length = string.size.to_f
    n_g = string.count('G') # We count for 'G', not 'g', so the input must be upcased.
    n_c = string.count('C') # We count for 'C', not 'c', so the input must be upcased.
    result = ( (n_g + n_c) * 100.0 / total_length ).round(@round_to_n_numbers)
    result = '%3f' % result
    set_results(result)
  }
end

#report_resultsObject Also known as: report

#

report_results (report tag)

#


189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 189

def report_results
  @gc_content_in_percent.each {|entry|
    _ = ''.dup
    string = string?
    # ===================================================================== #
    # truncate it here if it is too long.
    # ===================================================================== #
    if string.size > 30
      if @truncate_in_biopython_style # This means we use ... rather than the word [Truncated]
        last_three = string[-3,3]
        string = string[0..27]+'...'+last_three
      else
        string = string[0..30]+' [Truncated]'
      end
    end
    string = simp(string)
    _ << 'for ('+string+rev+')' if @display_dna_string
    erev 'The GC content '+_+' is: '+
          sfancy(
            entry.to_s.to_f.round(@round_to_n_numbers).to_s
          )+rev+'%'
  }
end

#resetObject

#

reset (reset tag)

#


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 59

def reset
  super()
  # ======================================================================= #
  # === @array_strings
  # ======================================================================= #
  @array_strings = [] # Initialize here.
  # ======================================================================= #
  # === @gc_content_in_percent
  # ======================================================================= #
  @gc_content_in_percent = []
  # ======================================================================= #
  # === @debug
  # ======================================================================= #
  @debug = false
  # ======================================================================= #
  # === @display_dna_string
  # ======================================================================= #
  @display_dna_string = true
  # ======================================================================= #
  # === @truncate_in_biopython_style
  # ======================================================================= #
  @truncate_in_biopython_style = false
  # ======================================================================= #
  # === @report_results
  # ======================================================================= #
  @report_results = true # Report the results.
end

#round_to_n_numbers?Boolean Also known as: round_to_n_numbers

#

round_to_n_numbers?

#

Returns:

  • (Boolean)


155
156
157
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 155

def round_to_n_numbers?
  @round_to_n_numbers
end

#runObject

#

run (run tag)

#


229
230
231
232
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 229

def run
  process_everything
  report_results if @report_results
end

#set_data(i = DEFAULT_INPUT) ⇒ Object Also known as: add, append

#

set_data

#


90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 90

def set_data(
    i = DEFAULT_INPUT
  )
  i = i.first if i.is_a? Array
  i = DEFAULT_INPUT if i.nil?
  if File.exist? i
    i = File.read(i)
  end 
  if @debug
    ewarn 'Debugging next in class CalculateGCContent, '\
          'showing the input to set_data() next:'
    pp i
  end
  if i.is_a? Array
    _ = i.zip.flatten
    _ = Array[_.join]
    @array_strings << _.upcase
    if @debug
      ewarn 'Debugging next in class CalculateGCContent, '\
            'showing the modified input now:'
      pp _
    end
    @array_strings.flatten!
  else # Else assume a String.
    @array_strings << i.upcase
  end
end

#set_results(i) ⇒ Object

#

set_results

The results are synonymous with @gc_content_in_percent.

#


144
145
146
147
148
149
150
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 144

def set_results(i)
  if i.is_a? String
    @gc_content_in_percent << i
  else
    @gc_content_in_percent.concat(i)
  end
end

#set_round_to_n_numbers(i = ROUND_TO_N_DECIMAL_POSITIONS) ⇒ Object

#

set_round_to_n_numbers

#


122
123
124
125
126
127
128
129
130
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 122

def set_round_to_n_numbers(
    i = ROUND_TO_N_DECIMAL_POSITIONS
  )
  case i
  when nil, :default
    i = ROUND_TO_N_DECIMAL_POSITIONS
  end
  @round_to_n_numbers = i
end

#string?Boolean

#

string?

#

Returns:

  • (Boolean)


135
136
137
# File 'lib/bioroebe/calculate/calculate_gc_content.rb', line 135

def string?
  @array_strings.join('')
end