Class: Bioroebe::CountAmountOfAminoacids

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/count/count_amount_of_aminoacids.rb

Overview

Bioroebe::CountAmountOfNucleotides

Constant Summary collapse

DEFAULT_STRING_TO_USE =
#

DEFAULT_STRING_TO_USE

#
'GCCSLTLPCGYFGNTNAVRP*WKPR*R*QRH*HLKPYYWRRGGFTHGLTGTDCAVKFPSA*GTNTHIRPGHTH'

Constants inherited from CommandlineApplication

Bioroebe::CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

Bioroebe::ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = DEFAULT_STRING_TO_USE, run_already = true) ⇒ CountAmountOfAminoacids

#

intitialize

#


39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 39

def initialize(
    i           = DEFAULT_STRING_TO_USE,
    run_already = true
  )
  super()
  reset
  set_string(i)
  case run_already
  # === :use_cliner
  when :use_cliner
    @use_cliner = true
    run_already = true
  end
  # ======================================================================= #
  # === Handle blocks next
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    # ===================================================================== #
    # === :be_quiet
    # ===================================================================== #
    when :be_quiet,
         :be_silent
      set_be_quiet
    end
  end
  run if run_already
end

Class Method Details

.return_composition_hash(of_this_sequence) ⇒ Object

#

Bioroebe::CountAmountOfAminoacids.return_composition_hash

This will simply return a Hash, which can then be used in downstream applications. Note that the Hash that will be returned will be sorted by the KEYS, in an alphabetical manner. That way it can be directly used in downstream applications, in particular graphical user interfaces.

Additionally, this method will always return ALL the 20 canonical aminoacids, and default to a value of 0 for them. The reason why this is done is because it was needed in a ruby-gtk widget to also show when there are aminoacids that are not found in a given protein.

#


299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 299

def self.return_composition_hash(of_this_sequence)
  _ = new(of_this_sequence) { :be_silent }
  hash = {}
  array1 = ::Bioroebe.return_array_of_common_aminoacids
  array2 = Array.new(array1.size, 0)
  array = array1.zip(array2) # Zip it up with 0-values here.
  hash = Hash[array]
  hash.merge!(_.hash?)
  sorted_hash = Hash[*hash.sort_by {|key, value|
    key
  }.flatten]
  # ======================================================================= #
  # The hash may then look like this:
  #
  #   {"A"=>9, "G"=>3, "K"=>4, "L"=>1, "S"=>1, "T"=>6}
  #
  # ======================================================================= #
  return sorted_hash
end

Instance Method Details

#analyse_compositionObject

#

analyse_composition

#


114
115
116
117
118
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 114

def analyse_composition
  @_.each {|entry|
    @hash[entry] = @hash[entry]+1 # Populate our main Hash here.
  }
end

#create_csv_fileObject

#

create_csv_file

This method will create a .csv file showing the aminoacid composition of the protein at hand.

The format is:

(1) one-letter shortcut for the amino acid at hand
(2) number of times this particular aminoacid occurs
(3) percentage value (15 digits maximum)
#


225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 225

def create_csv_file
  value = @hash.sort_by(&:first).flatten
  hash_existing_aminoacids_frequency = Hash[*value] # Would be e. g. {"L"=>2, "Q"=>1, "R"=>2}
  _ = ''.dup
  main_array = return_array_of_one_letter_aminoacids
  main_array.each {|this_aminoacid_as_one_letter|
    _ << this_aminoacid_as_one_letter+','
    # ===================================================================== #
    # Next, add how often this aminoacid occurs.
    # ===================================================================== #
    if hash_existing_aminoacids_frequency.has_key? this_aminoacid_as_one_letter
      _ << hash_existing_aminoacids_frequency[this_aminoacid_as_one_letter].to_s
    else
      _ << '0'
    end
    _ << ','
    # ===================================================================== #
    # Next, add the probability.
    # ===================================================================== #
    if hash_existing_aminoacids_frequency.has_key? this_aminoacid_as_one_letter
      n_times = hash_existing_aminoacids_frequency[this_aminoacid_as_one_letter].to_f
      percentage_value = ((n_times * 100.0) / n_aminoacids_in_total?.to_f)
      # =================================================================== #
      # Clean up this value a bit, e. g. 5.0 == 5.
      # =================================================================== #
      if percentage_value == percentage_value.to_i
        percentage_value = percentage_value.to_i
      end
      _ << percentage_value.to_s
    else
      _ << '0'
    end
    _ << N
  }
  into = "#{log_dir?}aminoacid_composition.csv"
  erev 'A '+yellowgreen('.csv file')+rev+
    ' showing the aminoacid composition was'
  erev "created at #{sfile(into)}"
  write_what_into(_, into)
end

#hash?Boolean

#

hash?

#

Returns:

  • (Boolean)


88
89
90
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 88

def hash?
  @hash
end

#input?Boolean Also known as: string?, string

#

input?

#

Returns:

  • (Boolean)


106
107
108
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 106

def input?
  @_
end

#n_aminoacids?Boolean

#

n_aminoacids?

#

Returns:

  • (Boolean)


132
133
134
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 132

def n_aminoacids?
  @_.size
end

#n_aminoacids_in_total?Boolean

#

n_aminoacids_in_total?

#

Returns:

  • (Boolean)


189
190
191
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 189

def n_aminoacids_in_total?
  return_sequence_without_trailing_stop_codon.size
end

#report_how_many_aminoacids_we_have_foundObject

#

report_how_many_aminoacids_we_have_found

#


206
207
208
209
210
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 206

def report_how_many_aminoacids_we_have_found
  e "#{rev}#{N}In total, "\
    "#{simportant(n_aminoacids_in_total?.to_s)}"\
    "#{rev} Aminoacids were found."
end

#report_the_atomic_composition(use_this_aminoacid_sequence = return_sequence_without_trailing_stop_codon) ⇒ Object

#

report_the_atomic_composition

#


269
270
271
272
273
274
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 269

def report_the_atomic_composition(
    use_this_aminoacid_sequence = return_sequence_without_trailing_stop_codon
  )
  ::Bioroebe.show_atomic_composition(use_this_aminoacid_sequence)
  # e # Newline to make this prettier to read. or perhaps not.
end

#report_the_molecular_mass_of_these_aminoacidsObject

#

report_the_molecular_mass_of_these_aminoacids

#


196
197
198
199
200
201
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 196

def report_the_molecular_mass_of_these_aminoacids
  _ = return_sequence_without_trailing_stop_codon
  mass_in_dalton = ::Bioroebe.amino_acid_average_mass(_).to_f.round(2).to_s
  e "#{rev}The (average) molecular mass of this protein is: "\
    "#{sfancy(mass_in_dalton)}#{rev} Dalton"
end

#report_the_number_of_negatively_and_positively_charged_residuesObject

#

report_the_number_of_negatively_and_positively_charged_residues

#


174
175
176
177
178
179
180
181
182
183
184
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 174

def report_the_number_of_negatively_and_positively_charged_residues
  aminoacid_sequence = return_sequence_without_trailing_stop_codon
  n_Asp_and_Glu = ( aminoacid_sequence.count('D') + aminoacid_sequence.count('E') )
  n_Arg_and_Lys = ( aminoacid_sequence.count('R') + aminoacid_sequence.count('K') )
  e
  e "Total number of #{mediumspringgreen('positively charged residues')}"\
    "#{rev} (Arg + Lys): #{rev}#{sfancy(n_Arg_and_Lys.to_s)}#{rev}"
  e "Total number of #{mediumspringgreen('negatively charged residues')}"\
    "#{rev} (Asp + Glu): #{rev}#{sfancy(n_Asp_and_Glu.to_s)}#{rev}"
  e    
end

#resetObject

#

reset

#


72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 72

def reset
  super()
  # ======================================================================= #
  # === @hash
  # ======================================================================= #
  @hash = {} # This Hash will keep the protein composition.
  @hash.default = 0
  # ======================================================================= #
  # === :be_verbose
  # ======================================================================= #
  set_be_verbose
end

#return_sequence_without_trailing_stop_codonObject

#

return_sequence_without_trailing_stop_codon

#


123
124
125
126
127
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 123

def return_sequence_without_trailing_stop_codon
  _ = input?
  _.pop if _.last == '*'
  return _
end

#runObject

#

run

#


279
280
281
282
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 279

def run
  analyse_composition # This will build up the main Hash first.
  show_statistics
end

#set_string(i = nil) ⇒ Object

#

set_string

#


95
96
97
98
99
100
101
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 95

def set_string(i = nil)
  i = DEFAULT_STRING_TO_USE if i.nil?
  i = DEFAULT_STRING_TO_USE if i.is_a? Array and i.empty?
  i = i.join(' ').strip if i.is_a? Array
  i = i.chars if i.is_a? String
  @_ = i
end

#show_statisticsObject Also known as: report

#

show_statistics

#


139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 139

def show_statistics
  n_members = n_aminoacids?
  # ======================================================================= #
  # Show the table of statistical information, in other words, how
  # many aminoacids this protein contains. This will thus show frame1.
  # ======================================================================= #
  if be_verbose?
    erev "This protein (#{yellow(n_members.to_s)}#{rev} aminoacids) has "\
         "#{olivedrab('the following aminoacid composition')}#{rev} "\
         "(Frame 1)."; e
  end
  sorted = @hash.sort_by(&:first)
  sorted.each {|key, value|
    percentage_value = Percentage[value.to_f, n_members.to_f].result
    percentage_value = '%.2f' % percentage_value
    formatted_percentage_value = percentage_value.to_s.rjust(5)+
      red('%') # The formatted % identifier.
    name_of_the_aminoacid = ::Bioroebe.return_long_aminoacid_name(key)
    e '  '+slateblue(key.to_s)+rev+' => '+
      yellow(value.to_s.rjust((n_aminoacids?.to_s.size)))+
      rev+' ('+sfancy(formatted_percentage_value)+rev+
      ') ('+name_of_the_aminoacid+')' if be_verbose?
  }
  if be_verbose?
    report_how_many_aminoacids_we_have_found; e
    report_the_molecular_mass_of_these_aminoacids
    report_the_number_of_negatively_and_positively_charged_residues
    report_the_atomic_composition
    create_csv_file
  end
end