Class: Bioroebe::ConsensusSequence

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb

Overview

Bioroebe::ConsensusSequence

Constant Summary collapse

A_T_or_C =
#

A_T_or_C

#
%w( A T C )

Constants inherited from CommandlineApplication

Bioroebe::CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

Bioroebe::ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(*i) ⇒ ConsensusSequence

#

initialize

#


29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 29

def initialize(*i)
  reset
  run_already = true # This is the default value.
  # ======================================================================= #
  # === Handle blocks next
  # ======================================================================= #
  if block_given?
    yielded = yield
    case yielded # case tag
    # ===================================================================== #
    # === :do_not_run_yet_and_be_quiet
    # ===================================================================== #
    when :do_not_run_yet_and_be_quiet
      run_already = false
      set_be_quiet
      @report_the_frequencies = false
    # ===================================================================== #
    # === :do_not_run_yet
    # ===================================================================== #
    when :do_not_run_yet
      run_already = false
    # ===================================================================== #
    # === :be_quiet
    # ===================================================================== #
    when :be_quiet
      set_be_quiet
      @report_the_frequencies = false
    end
  end
  set_input_sequences(i)
  run if run_already
end

Instance Method Details

#calculate_frequencies_of(array) ⇒ Object

#

calculate_frequencies_of

#


191
192
193
194
195
196
197
198
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 191

def calculate_frequencies_of(array)
  hash = {}
  hash['A'] = array.count('A')
  hash['T'] = array.count('T')
  hash['C'] = array.count('C')
  hash['G'] = array.count('G')
  return hash
end

#colourize_nucleotide_position(i) ⇒ Object

#

colourize_nucleotide_position

#


142
143
144
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 142

def colourize_nucleotide_position(i)
  seagreen(i)
end

#consensus_sequence?Boolean

#

consensus_sequence?

#

Returns:

  • (Boolean)


184
185
186
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 184

def consensus_sequence?
  @array_consensus_positions.join
end

#display_an_even_simpler_consensus_sequence(i = consensus_sequence? ) ⇒ Object

#

display_an_even_simpler_consensus_sequence

#


264
265
266
267
268
269
270
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 264

def display_an_even_simpler_consensus_sequence(
    i = consensus_sequence?
  )
  this_sequence = orange(return_an_even_simpler_consensus_sequence(i))
  erev "  #{this_sequence}#{rev} "\
       "# ← a very likely consensus sequence"
end

#input_sequences?Boolean

#

input_sequences?

#

Returns:

  • (Boolean)


84
85
86
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 84

def input_sequences?
  @input_sequences
end

#iterate_over_the_sequencesObject Also known as: determine_the_consensus_sequences

#

iterate_over_the_sequences

#


98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 98

def iterate_over_the_sequences
  size = @input_sequences.first.size
  0.upto(size - 1) {|n|
    mapped = @input_sequences.map {|line| line[n,1] }
    frequencies = calculate_frequencies_of(mapped)
    # ===================================================================== #
    # max_value has two values:
    #
    #   (1) the nucleotide
    #   (2) the amount of times that nucleotide occurs
    #
    # We can not use .max_by because it returns only one result; so we
    # have to use .group_by instead.
    # ===================================================================== #
    # max_value = frequencies.max_by {|key, value| value }
    max_value = frequencies.group_by {|key, value| value }.max.last # => [2, 5]
    _ = max_value.map {|first, last| first }
    if _.size > 1
      append_this = '['+_.join('/')+']'
    else
      append_this = _.join
    end
    if @report_the_frequencies
      erev "The #{mediumslateblue('frequencies')}#{rev} "\
           "for #{lightgreen(mapped.join)}#{rev} "+
           '(at '+colourize_nucleotide_position("nucleotide position #{n+1}")+
           rev+') were:'
      e
      print '  '; pp frequencies
      erev "\nThe associated max-value for "+
           colourize_nucleotide_position("nucleotide position #{n+1}")+
           "#{rev} was: "+
           simp(
             max_value.map {|a| a.join(': ') }.join('; ')
           )
      e
    end
    @array_consensus_positions << append_this
  }
end

#random_nucleotide_for_this_IUPAC_code(i) ⇒ Object

#

random_nucleotide_for_this_IUPAC_code

#


149
150
151
152
153
154
155
156
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 149

def random_nucleotide_for_this_IUPAC_code(i)
  case i
  when 'H'
    return_A_or_T_or_C
  when 'M'
    return_A_or_C
  end
end

#report_on_which_sequences_we_will_operate(be_verbose = be_verbose? ) ⇒ Object

#

report_on_which_sequences_we_will_operate

#


203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 203

def report_on_which_sequences_we_will_operate(
    be_verbose = be_verbose?
  )
  case be_verbose
  # ======================================================================= #
  # === :be_verbose
  # ======================================================================= #
  when :be_verbose
    be_verbose = true
  end
  if be_verbose
    e
    erev "#{rev}Working on the following "\
         "#{steelblue(@input_sequences.size)} "\
         "#{rev}sequences next:"
    e
    @input_sequences.each_with_index {|this_sequence, index| index += 1
      e "  #{sfancy(this_sequence)} #{steelblue(index.to_s.rjust(3))}"
    }; e
  end
end

#report_the_consensus_sequenceObject

#

report_the_consensus_sequence

#


228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 228

def report_the_consensus_sequence
  if be_verbose?
    the_consensus_sequence = consensus_sequence?
    erev 'The consensus sequence is: '
    e
    e "  #{simp(the_consensus_sequence)}"
    e
    if the_consensus_sequence.include? '/'
      # =================================================================== #
      # In this case, we can perhaps simplify it via the IUPAC code.
      # =================================================================== #
      try_to_find_and_report_a_simpler_consensus_sequence(
        the_consensus_sequence
      )
      # =================================================================== #
      # And this here will just pick any even simpler consensus
      # sequence. We assume that this would imply equal likelihood
      # for a given sequence.
      # =================================================================== #
      try_to_find_and_report_an_even_simpler_consensus_sequence(
        the_consensus_sequence
      )
      if be_verbose?
        erev 'Finally, we will display the above findings in a'
        erev 'shorter variant:'
        report_on_which_sequences_we_will_operate
        display_an_even_simpler_consensus_sequence
        e
      end
    end
  end
end

#resetObject

#

reset (reset tag)

#


65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 65

def reset
  super()
  # ======================================================================= #
  # === @array_consensus_positions
  # ======================================================================= #
  @array_consensus_positions = []
  # ======================================================================= #
  # === @be_verbose
  # ======================================================================= #
  set_be_verbose
  # ======================================================================= #
  # === @report_the_frequencies
  # ======================================================================= #
  @report_the_frequencies = true
end

#return_A_or_CObject

#

return_A_or_C

#


161
162
163
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 161

def return_A_or_C
  %w( A C ).sample
end

#return_A_or_TObject

#

return_A_or_T

#


168
169
170
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 168

def return_A_or_T
  %w( A T ).sample
end

#return_A_or_T_or_CObject

#

return_A_or_T_or_C

This method will randomly pick either A, T or C.

#


177
178
179
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 177

def return_A_or_T_or_C
  A_T_or_C.sample
end

#return_an_even_simpler_consensus_sequence(i = consensus_sequence? ) ⇒ Object

#

return_an_even_simpler_consensus_sequence

#


275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 275

def return_an_even_simpler_consensus_sequence(
    i = consensus_sequence?
  )
  _ = i.dup
  if _.include? '[A/T/C]'
    _.gsub!(/\[A\/T\/C\]/,
      steelblue(return_A_or_T_or_C)+
      remove_escape_sequence(orange)
    )
  end
  if _.include? '[A/C]'
    _.gsub!(/\[A\/C\]/,
      steelblue(return_A_or_C)+
      remove_escape_sequence(orange)
    )
  end
  if _.include? '[A/T]'
    _.gsub!(/\[A\/T\]/,
      steelblue(return_A_or_T)+
      remove_escape_sequence(orange)
    )
  end
  _
end

#runObject

#

run (run tag)

#


355
356
357
358
359
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 355

def run
  report_on_which_sequences_we_will_operate
  iterate_over_the_sequences
  report_the_consensus_sequence
end

#set_input_sequences(i) ⇒ Object

#

set_input_sequences(i)

#


91
92
93
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 91

def set_input_sequences(i)
  @input_sequences = [i].flatten
end

#try_to_find_and_report_a_simpler_consensus_sequence(i = consensus_sequence? ) ⇒ Object

#

try_to_find_and_report_a_simpler_consensus_sequence

For a list of IUPAC codes there, you can use this link:

https://www.bioinformatics.org/sms/iupac.html
#


323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 323

def try_to_find_and_report_a_simpler_consensus_sequence(
    i = consensus_sequence?
  )
  erev "#{rev}We will try to find - and report - a simpler consensus sequence:"
  i = orange(i)
  i = i.dup if i.frozen?
  # ======================================================================= #
  # === Handle [A/T/C]
  # ======================================================================= #
  if i.include? '[A/T/C]'
    i.gsub!(/\[A\/T\/C\]/,
      steelblue('H')+
      remove_escape_sequence(orange)
    )
  end
  # ======================================================================= #
  # === Handle [A/C]
  # ======================================================================= #
  if i.include? '[A/C]'
    i.gsub!(/\[A\/C\]/,
      steelblue('M')+
      remove_escape_sequence(orange)
    )
  end
  e
  erev "  #{i}"
  e
end

#try_to_find_and_report_an_even_simpler_consensus_sequence(i = consensus_sequence? ) ⇒ Object

#

try_to_find_and_report_an_even_simpler_consensus_sequence

#


303
304
305
306
307
308
309
310
311
312
313
# File 'lib/bioroebe/utility_scripts/consensus_sequence/consensus_sequence.rb', line 303

def try_to_find_and_report_an_even_simpler_consensus_sequence(
    i = consensus_sequence?
  )
  erev "#{rev}And the following is an even simpler consensus "\
       "sequence (blue"
  erev "denotes random, equal choice):"
  _ = return_an_even_simpler_consensus_sequence(i)
  e
  erev "  #{_}"
  e
end