Class: Bioroebe::ConsensusSequence

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/utility_scripts/consensus_sequence.rb

Overview

Bioroebe::ConsensusSequence

Constant Summary

Constants inherited from CommandlineApplication

Bioroebe::CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

Bioroebe::ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opne, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #set_be_verbose, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #was_or_were, #without_extname, #write_what_into

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #file_readlines, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #write_what_into

Constructor Details

#initialize(*i) ⇒ ConsensusSequence

#

initialize

#


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 24

def initialize(*i)
  reset
  run_already = true # This is the default value.
  # ======================================================================= #
  # === Handle blocks next
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded # case tag
    # ===================================================================== #
    # === :do_not_run_yet_and_be_quiet
    # ===================================================================== #
    when :do_not_run_yet_and_be_quiet
      run_already = false
      @be_verbose = false
      @report_the_frequencies = false
    # ===================================================================== #
    # === :do_not_run_yet
    # ===================================================================== #
    when :do_not_run_yet
      run_already = false
    # ===================================================================== #
    # === :be_quiet
    # ===================================================================== #
    when :be_quiet
      @be_verbose = false
      @report_the_frequencies = false
    end
  end
  set_input_sequences(i)
  run if run_already
end

Instance Method Details

#calculate_frequencies_of(array) ⇒ Object

#

calculate_frequencies_of

#


185
186
187
188
189
190
191
192
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 185

def calculate_frequencies_of(array)
  hash = {}
  hash['A'] = array.count('A')
  hash['T'] = array.count('T')
  hash['C'] = array.count('C')
  hash['G'] = array.count('G')
  return hash
end

#colourize_nucleotide_position(i) ⇒ Object

#

colourize_nucleotide_position

#


137
138
139
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 137

def colourize_nucleotide_position(i)
  seagreen(i)
end

#consensus_sequence?Boolean

#

consensus_sequence?

#

Returns:

  • (Boolean)


178
179
180
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 178

def consensus_sequence?
  @array_consensus_positions.join
end

#display_an_even_simpler_consensus_sequence(i = consensus_sequence? ) ⇒ Object

#

display_an_even_simpler_consensus_sequence

#


255
256
257
258
259
260
261
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 255

def display_an_even_simpler_consensus_sequence(
    i = consensus_sequence?
  )
  this_sequence = orange(return_an_even_simpler_consensus_sequence(i))
  erev "  #{this_sequence}#{rev} "\
       "# ← a very likely consensus sequence"
end

#input_sequences?Boolean

#

input_sequences?

#

Returns:

  • (Boolean)


79
80
81
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 79

def input_sequences?
  @input_sequences
end

#iterate_over_the_sequencesObject Also known as: determine_the_consensus_sequences

#

iterate_over_the_sequences

#


93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 93

def iterate_over_the_sequences
  size = @input_sequences.first.size
  0.upto(size - 1) {|n|
    mapped = @input_sequences.map {|line| line[n,1] }
    frequencies = calculate_frequencies_of(mapped)
    # ===================================================================== #
    # max_value has two values:
    #
    #   (1) the nucleotide
    #   (2) the amount of times that nucleotide occurs
    #
    # We can not use .max_by because it returns only one result; so we
    # have to use .group_by instead.
    # ===================================================================== #
    # max_value = frequencies.max_by {|key, value| value }
    max_value = frequencies.group_by {|key, value| value }.max.last # => [2, 5]
    _ = max_value.map {|first, last| first }
    if _.size > 1
      append_this = '['+_.join('/')+']'
    else
      append_this = _.join
    end
    if @report_the_frequencies
      erev "The #{mediumslateblue('frequencies')}#{rev} "\
           "for #{lightgreen(mapped.join)}#{rev} "+
           '(at '+colourize_nucleotide_position("nucleotide position #{n+1}")+
           rev+') were:'
      e
      print '  '; pp frequencies
      erev "\nThe associated max-value for "+
           colourize_nucleotide_position("nucleotide position #{n+1}")+
           "#{rev} was: "+
           simp(
             max_value.map {|a| a.join(': ') }.join('; ')
           )
      e
    end
    @array_consensus_positions << append_this
  }
end

#random_nucleotide_for_this_IUPAC_code(i) ⇒ Object

#

random_nucleotide_for_this_IUPAC_code

#


144
145
146
147
148
149
150
151
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 144

def random_nucleotide_for_this_IUPAC_code(i)
  case i
  when 'H'
    return_A_or_T_or_C
  when 'M'
    return_A_or_C
  end
end

#report_on_which_sequences_we_will_operate(be_verbose = @be_verbose) ⇒ Object

#

report_on_which_sequences_we_will_operate

#


197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 197

def report_on_which_sequences_we_will_operate(
    be_verbose = @be_verbose
  )
  case be_verbose
  when :be_verbose
    be_verbose = true
  end
  if be_verbose
    e
    erev "#{rev}Working on the following "\
         "#{steelblue(@input_sequences.size)} "\
         "#{rev}sequences next:"
    e
    @input_sequences.each_with_index {|this_sequence, index| index += 1
      e "  #{sfancy(this_sequence)} #{steelblue(index.to_s.rjust(3))}"
    }; e
  end
end

#report_the_consensus_sequenceObject

#

report_the_consensus_sequence

#


219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 219

def report_the_consensus_sequence
  if @be_verbose
    the_consensus_sequence = consensus_sequence?
    erev 'The consensus sequence is: '
    e
    e "  #{simp(the_consensus_sequence)}"
    e
    if the_consensus_sequence.include? '/'
      # =================================================================== #
      # In this case, we can perhaps simplify it via the IUPAC code.
      # =================================================================== #
      try_to_find_and_report_a_simpler_consensus_sequence(
        the_consensus_sequence
      )
      # =================================================================== #
      # And this here will just pick any even simpler consensus
      # sequence. We assume that this would imply equal likelihood
      # for a given sequence.
      # =================================================================== #
      try_to_find_and_report_an_even_simpler_consensus_sequence(
        the_consensus_sequence
      )
      if @be_verbose
        erev 'Finally, we will display the above findings in a'
        erev 'shorter variant:'
        report_on_which_sequences_we_will_operate
        display_an_even_simpler_consensus_sequence
        e
      end
    end
  end
end

#resetObject

#

reset (reset tag)

#


60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 60

def reset
  super()
  # ======================================================================= #
  # === @array_consensus_positions
  # ======================================================================= #
  @array_consensus_positions = []
  # ======================================================================= #
  # === @be_verbose
  # ======================================================================= #
  @be_verbose = true
  # ======================================================================= #
  # === @report_the_frequencies
  # ======================================================================= #
  @report_the_frequencies = true
end

#return_A_or_CObject

#

return_A_or_C

#


156
157
158
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 156

def return_A_or_C
  %w( A C ).sample
end

#return_A_or_TObject

#

return_A_or_T

#


163
164
165
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 163

def return_A_or_T
  %w( A T ).sample
end

#return_A_or_T_or_CObject

#

return_A_or_T_or_C

#


171
172
173
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 171

def return_A_or_T_or_C
  %w( A T C ).sample
end

#return_an_even_simpler_consensus_sequence(i = consensus_sequence? ) ⇒ Object

#

return_an_even_simpler_consensus_sequence

#


266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 266

def return_an_even_simpler_consensus_sequence(
    i = consensus_sequence?
  )
  _ = i.dup
  if _.include? '[A/T/C]'
    _.gsub!(/\[A\/T\/C\]/,
      steelblue(return_A_or_T_or_C)+
      remove_escape_sequence(orange)
    )
  end
  if _.include? '[A/C]'
    _.gsub!(/\[A\/C\]/,
      steelblue(return_A_or_C)+
      remove_escape_sequence(orange)
    )
  end
  if _.include? '[A/T]'
    _.gsub!(/\[A\/T\]/,
      steelblue(return_A_or_T)+
      remove_escape_sequence(orange)
    )
  end
  _
end

#runObject

#

run (run tag)

#


346
347
348
349
350
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 346

def run
  report_on_which_sequences_we_will_operate
  iterate_over_the_sequences
  report_the_consensus_sequence
end

#set_input_sequences(i) ⇒ Object

#

set_input_sequences(i)

#


86
87
88
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 86

def set_input_sequences(i)
  @input_sequences = [i].flatten
end

#try_to_find_and_report_a_simpler_consensus_sequence(i = consensus_sequence? ) ⇒ Object

#

try_to_find_and_report_a_simpler_consensus_sequence

For a list of IUPAC codes there, you can use this link:

https://www.bioinformatics.org/sms/iupac.html
#


314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 314

def try_to_find_and_report_a_simpler_consensus_sequence(
    i = consensus_sequence?
  )
  erev "#{rev}We will try to find - and report - a simpler consensus sequence:"
  i = orange(i)
  i = i.dup if i.frozen?
  # ======================================================================= #
  # === Handle [A/T/C]
  # ======================================================================= #
  if i.include? '[A/T/C]'
    i.gsub!(/\[A\/T\/C\]/,
      steelblue('H')+
      remove_escape_sequence(orange)
    )
  end
  # ======================================================================= #
  # === Handle [A/C]
  # ======================================================================= #
  if i.include? '[A/C]'
    i.gsub!(/\[A\/C\]/,
      steelblue('M')+
      remove_escape_sequence(orange)
    )
  end
  e
  erev "  #{i}"
  e
end

#try_to_find_and_report_an_even_simpler_consensus_sequence(i = consensus_sequence? ) ⇒ Object

#

try_to_find_and_report_an_even_simpler_consensus_sequence

#


294
295
296
297
298
299
300
301
302
303
304
# File 'lib/bioroebe/utility_scripts/consensus_sequence.rb', line 294

def try_to_find_and_report_an_even_simpler_consensus_sequence(
    i = consensus_sequence?
  )
  erev "#{rev}And the following is an even simpler consensus "\
       "sequence (blue"
  erev "denotes random, equal choice):"
  _ = return_an_even_simpler_consensus_sequence(i)
  e
  erev "  #{_}"
  e
end