Class: Bioroebe::CountAmountOfAminoacids

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/count/count_amount_of_aminoacids.rb

Overview

Bioroebe::CountAmountOfNucleotides

Constant Summary collapse

DEFAULT_STRING_TO_USE =
#

DEFAULT_STRING_TO_USE

#
'GCCSLTLPCGYFGNTNAVRP*WKPR*R*QRH*HLKPYYWRRGGFTHGLTGTDCAVKFPSA*GTNTHIRPGHTH'

Constants inherited from CommandlineApplication

Bioroebe::CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

Bioroebe::ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opne, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #set_be_verbose, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #file_readlines, #infer_the_namespace, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #namespace?, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Constructor Details

#initialize(i = DEFAULT_STRING_TO_USE, run_already = true) ⇒ CountAmountOfAminoacids

#

intitialize

#

42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 42

def initialize(
    i           = DEFAULT_STRING_TO_USE,
    run_already = true
  )
  super()
  reset
  set_string(i)
  case run_already
  when :use_cliner
    @use_cliner = true
    run_already = true
  end
  # ======================================================================= #
  # === Handle blocks next
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded
    when :be_quiet,
         :be_silent
      @be_verbose = false
    end
  end
  run if run_already
end

Class Method Details

.return_composition_hash(of_this_sequence) ⇒ Object

#

Bioroebe::CountAmountOfAminoacids.return_composition_hash

This will simply return a Hash, which can then be used in downstream applications. Note that the Hash that will be returned will be sorted by the KEYS, in an alphabetical manner. That way it can be directly used in downstream applications, in particular graphical user interfaces.

Additionally, this method will always return ALL the 20 canonical aminoacids, and default to a value of 0 for them. The reason why this is done is because it was needed in a ruby-gtk widget to also show when there are aminoacids that are not found in a given protein.

#

297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 297

def self.return_composition_hash(of_this_sequence)
  _ = new(of_this_sequence) { :be_silent }
  hash = {}
  array1 = ::Bioroebe.return_array_of_common_aminoacids
  array2 = Array.new(array1.size, 0)
  array = array1.zip(array2) # Zip it up with 0-values here.
  hash = Hash[array]
  hash.merge!(_.hash?)
  sorted_hash = Hash[*hash.sort_by {|key, value|
    key
  }.flatten]
  # ======================================================================= #
  # The hash may then look like this:
  #
  #   {"A"=>9, "G"=>3, "K"=>4, "L"=>1, "S"=>1, "T"=>6}
  #
  # ======================================================================= #
  return sorted_hash
end

Instance Method Details

#analyse_compositionObject

#

analyse_composition

#

113
114
115
116
117
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 113

def analyse_composition
  @_.each {|entry|
    @hash[entry] = @hash[entry]+1 # Populate our main Hash here.
  }
end

#create_csv_fileObject

#

create_csv_file

This method will create a .csv file showing the aminoacid composition of the protein at hand.

The format is:

(1) one-letter shortcut for the amino acid at hand
(2) number of times this particular aminoacid occurs
(3) percentage value (15 digits maximum)
#

223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 223

def create_csv_file
  value = @hash.sort_by(&:first).flatten
  hash_existing_aminoacids_frequency = Hash[*value] # Would be e. g. {"L"=>2, "Q"=>1, "R"=>2}
  _ = ''.dup
  main_array = return_array_of_one_letter_aminoacids
  main_array.each {|this_aminoacid_as_one_letter|
    _ << this_aminoacid_as_one_letter+','
    # ===================================================================== #
    # Next, add how often this aminoacid occurs.
    # ===================================================================== #
    if hash_existing_aminoacids_frequency.has_key? this_aminoacid_as_one_letter
      _ << hash_existing_aminoacids_frequency[this_aminoacid_as_one_letter].to_s
    else
      _ << '0'
    end
    _ << ','
    # ===================================================================== #
    # Next, add the probability.
    # ===================================================================== #
    if hash_existing_aminoacids_frequency.has_key? this_aminoacid_as_one_letter
      n_times = hash_existing_aminoacids_frequency[this_aminoacid_as_one_letter].to_f
      percentage_value = ((n_times * 100.0) / n_aminoacids_in_total?.to_f)
      # =================================================================== #
      # Clean up this value a bit, e. g. 5.0 == 5.
      # =================================================================== #
      if percentage_value == percentage_value.to_i
        percentage_value = percentage_value.to_i
      end
      _ << percentage_value.to_s
    else
      _ << '0'
    end
    _ << N
  }
  into = "#{log_dir?}aminoacid_composition.csv"
  erev 'A '+yellowgreen('.csv file')+rev+
    ' showing the aminoacid composition was'
  erev "created at #{sfile(into)}"
  write_what_into(_, into)
end

#hash?Boolean

#

hash?

#

Returns:

  • (Boolean)

87
88
89
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 87

def hash?
  @hash
end

#input?Boolean Also known as: string?, string

#

input?

#

Returns:

  • (Boolean)

105
106
107
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 105

def input?
  @_
end

#n_aminoacids?Boolean

#

n_aminoacids?

#

Returns:

  • (Boolean)

131
132
133
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 131

def n_aminoacids?
  @_.size
end

#n_aminoacids_in_total?Boolean

#

n_aminoacids_in_total?

#

Returns:

  • (Boolean)

187
188
189
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 187

def n_aminoacids_in_total?
  return_sequence_without_trailing_stop_codon.size
end

#report_how_many_aminoacids_we_have_foundObject

#

report_how_many_aminoacids_we_have_found

#

204
205
206
207
208
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 204

def report_how_many_aminoacids_we_have_found
  e "#{rev}#{N}In total, "\
    "#{simportant(n_aminoacids_in_total?.to_s)}"\
    "#{rev} Aminoacids were found."
end

#report_the_atomic_composition(use_this_aminoacid_sequence = return_sequence_without_trailing_stop_codon) ⇒ Object

#

report_the_atomic_composition

#

267
268
269
270
271
272
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 267

def report_the_atomic_composition(
    use_this_aminoacid_sequence = return_sequence_without_trailing_stop_codon
  )
  ::Bioroebe.show_atomic_composition(use_this_aminoacid_sequence)
  # e # Newline to make this prettier to read. or perhaps not.
end

#report_the_molecular_mass_of_these_aminoacidsObject

#

report_the_molecular_mass_of_these_aminoacids

#

194
195
196
197
198
199
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 194

def report_the_molecular_mass_of_these_aminoacids
  _ = return_sequence_without_trailing_stop_codon
  mass_in_dalton = ::Bioroebe.amino_acid_average_mass(_).to_f.round(2).to_s
  e "#{rev}The (average) molecular mass of this protein is: "\
    "#{sfancy(mass_in_dalton)}#{rev} Dalton"
end

#report_the_number_of_negatively_and_positively_charged_residuesObject

#

report_the_number_of_negatively_and_positively_charged_residues

#

172
173
174
175
176
177
178
179
180
181
182
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 172

def report_the_number_of_negatively_and_positively_charged_residues
  aminoacid_sequence = return_sequence_without_trailing_stop_codon
  n_Asp_and_Glu = ( aminoacid_sequence.count('D') + aminoacid_sequence.count('E') )
  n_Arg_and_Lys = ( aminoacid_sequence.count('R') + aminoacid_sequence.count('K') )
  e
  e "Total number of #{mediumspringgreen('positively charged residues')}"\
    "#{rev} (Arg + Lys): #{rev}#{sfancy(n_Arg_and_Lys.to_s)}#{rev}"
  e "Total number of #{mediumspringgreen('negatively charged residues')}"\
    "#{rev} (Asp + Glu): #{rev}#{sfancy(n_Asp_and_Glu.to_s)}#{rev}"
  e    
end

#resetObject

#

reset

#

71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 71

def reset
  super()
  # ======================================================================= #
  # === @hash
  # ======================================================================= #
  @hash = {} # This Hash will keep the protein composition.
  @hash.default = 0
  # ======================================================================= #
  # === @be_verbose
  # ======================================================================= #
  @be_verbose = true
end

#return_sequence_without_trailing_stop_codonObject

#

return_sequence_without_trailing_stop_codon

#

122
123
124
125
126
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 122

def return_sequence_without_trailing_stop_codon
  _ = input?
  _.pop if _.last == '*'
  return _
end

#runObject

#

run

#

277
278
279
280
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 277

def run
  analyse_composition # This will build up the main Hash first.
  show_statistics
end

#set_string(i = nil) ⇒ Object

#

set_string

#

94
95
96
97
98
99
100
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 94

def set_string(i = nil)
  i = DEFAULT_STRING_TO_USE if i.nil?
  i = DEFAULT_STRING_TO_USE if i.is_a? Array and i.empty?
  i = i.join(' ').strip if i.is_a? Array
  i = i.chars if i.is_a? String
  @_ = i
end

#show_statisticsObject Also known as: report

#

show_statistics

#

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/bioroebe/count/count_amount_of_aminoacids.rb', line 138

def show_statistics
  n_members = n_aminoacids?
  # ======================================================================= #
  # Show the table of statistical information, in other words, how
  # many aminoacids this protein contains. This will thus show frame1.
  # ======================================================================= #
  if @be_verbose
    erev "This protein (#{yellow(n_members.to_s)}#{rev} aminoacids) has "\
         "the following aminoacid composition (Frame 1)."; e
  end
  sorted = @hash.sort_by(&:first)
  sorted.each {|key, value|
    percentage_value = Percentage[value.to_f, n_members.to_f].result
    percentage_value = '%.2f' % percentage_value
    formatted_percentage_value = percentage_value.to_s.rjust(5)+
      red('%') # The formatted % identifier.
    name_of_the_aminoacid = ::Bioroebe.return_long_aminoacid_name(key)
    e '  '+slateblue(key.to_s)+rev+' => '+
      yellow(value.to_s.rjust((n_aminoacids?.to_s.size)))+
      rev+' ('+sfancy(formatted_percentage_value)+rev+
      ') ('+name_of_the_aminoacid+')' if @be_verbose
  }
  if @be_verbose
    report_how_many_aminoacids_we_have_found
    report_the_molecular_mass_of_these_aminoacids
    report_the_number_of_negatively_and_positively_charged_residues
    report_the_atomic_composition
    create_csv_file
  end
end