Class: Bioroebe::ParseFrequencyTable

Inherits:
CommandlineApplication show all
Defined in:
lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb

Overview

Bioroebe::ParseFrequencyTable

Constant Summary collapse

AUTOMATICALLY_CREATE_A_LOCAL_YAML_FILE_IF_IT_DOES_NOT_YET_EXIST =
#

AUTOMATICALLY_CREATE_A_LOCAL_YAML_FILE_IF_IT_DOES_NOT_YET_EXIST

If this constant is set to true then this class will attempt to create a local .yml file, unless such a file already exists.

That .yml file will include the keys and frequency values associated with these keys, such as: “UUU: 17.7”.

The idea behind this is so that we can simplify the creation of such .yml files, which in turn may then be used to populate a SQL database.

#
true
USE_THIS_REGEX =
#

USE_THIS_REGEX

See here:

https://rubular.com/r/NgSPXJ2f5WG2kq
#
/([A-Z]{3})  ?(\d{1,2}\.\d{0,2})/
DEFAULT_INPUT =
#

DEFAULT_INPUT

This is just sample-data that can be used to test the inner-working of this class.

#
'UUU 17.7(  2478)  UCU 14.2(  1994)  UAU 12.1(  1698)  UGU 10.5(  1473)
UUC 25.9(  3630)  UCC 18.9(  2641)  UAC 18.9(  2640)  UGC 13.8(  1928)
UUA  6.3(   880)  UCA  9.5(  1328)  UAA  0.7(   102)  UGA  1.5(   216)
UUG 13.5(  1892)  UCG  4.9(   684)  UAG  0.5(    64)  UGG 15.3(  2148)

CUU 11.9(  1667)  CCU 13.9(  1947)  CAU  8.7(  1219)  CGU  3.7(   519)
CUC 22.9(  3210)  CCC 20.4(  2860)  CAC 14.8(  2076)  CGC  9.8(  1371)
CUA  6.4(   897)  CCA 13.1(  1838)  CAA 11.6(  1630)  CGA  5.0(   696)
CUG 45.8(  6407)  CCG  6.8(   952)  CAG 30.1(  4212)  CGG 10.7(  1502)

AUU 14.9(  2081)  ACU 11.4(  1594)  AAU 17.0(  2378)  AGU 10.2(  1426)
AUC 24.9(  3483)  ACC 21.6(  3020)  AAC 24.2(  3391)  AGC 18.8(  2630)
AUA  7.4(  1041)  ACA 12.7(  1784)  AAA 23.3(  3255)  AGA 12.0(  1678)
AUG 22.4(  3140)  ACG  8.5(  1186)  AAG 30.7(  4292)  AGG 12.7(  1779)

GUU 10.2(  1421)  GCU 16.5(  2314)  GAU 19.5(  2733)  GGU  9.2(  1293)
GUC 18.3(  2566)  GCC 31.4(  4391)  GAC 26.8(  3758)  GGC 22.5(  3143)
GUA  6.2(   869)  GCA 13.1(  1833)  GAA 27.1(  3800)  GGA 16.0(  2239)
GUG 30.5(  4267)  GCG  8.6(  1207)  GAG 35.8(  5005)  GGG 15.6(  2181)'

Constants inherited from CommandlineApplication

CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #red, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = ARGV, run_already = true) ⇒ ParseFrequencyTable

#

initialize

#


84
85
86
87
88
89
90
91
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 84

def initialize(
    i           = ARGV,
    run_already = true
  )
  reset
  set_commandline_arguments(i)
  run if run_already
end

Class Method Details

.[](i = ARGV) ⇒ Object

#

Bioroebe::ParseFrequencyTable[]

#


318
319
320
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 318

def self.[](i = ARGV)
  new(i)
end

Instance Method Details

#determine_whether_we_have_to_download_from_a_remote_urlObject

#

determine_whether_we_have_to_download_from_a_remote_url

This method will determine whether this class has to download the dataset from a remote URL.

#


192
193
194
195
196
197
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 192

def determine_whether_we_have_to_download_from_a_remote_url
  first = first?
  if first and first.start_with?('http')
    set_use_this_url(first)
  end
end

#do_store_into_a_local_yaml_file(into = :default) ⇒ Object

#

do_store_into_a_local_yaml_file

#


218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 218

def do_store_into_a_local_yaml_file(
    into = :default
  )
  case into
  when :default
    into = 'default_yaml_file.yml'
  end
  if File.exist? into
    e "Can not store into the local file #{sfile(into)} as such a file "\
      "already exists."
  else
    remote_url = remote_url?
    what = ''.dup
    if remote_url.include? '?species='
      # =================================================================== #
      # Assume an URL such as this one here:
      #
      #   https://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=107243
      #
      # =================================================================== #
      if into.include? 'default_yaml'
        into = remote_url.split('=').last.strip
        # ================================================================= #
        # We can also use a regex on the original dataset to obtain
        # the name of the organism, such as here:
        #
        #   <STRONG><i>Thlaspi caerulescens </i>[gbpln]: 45 CDS's (20416 codons)</STRONG>
        #
        # ================================================================= #
        original_dataset = original_dataset?
        if original_dataset.include? '<STRONG>'
          use_this_regex = /<STRONG><i>(.+) <\/i>/ # See: https://rubular.com/r/o4WJAeRxuvB4kM
          original_dataset =~ use_this_regex
          match = $1.to_s.dup.strip.tr(' ','_')
          into << "_#{match}"
        end
        into << '.yml' unless into.end_with? '.yml'
      end
    end
    if @hash_storing_the_codon_frequencies.empty?
      e 'No dataset was found at the URL `'+sfancy(url?)+'`.'
    else
      # =================================================================== #
      # First create a generic header for that file:
      # =================================================================== #
      what << "#\n"
      what << "# Filename: #{into}\n"
      what << "#\n"
      what << "# The following dataset was obtained from:\n"
      what << "#\n"
      what << "#   #{remote_url}\n"
      what << "#\n"
      what << "#\n"
      @hash_storing_the_codon_frequencies.each_pair {|key, value|
        what << "#{key}: #{value.to_s.rjust(4)}\n"
      }
      e "Saving into the local file at `#{sfile(into)}`."
      write_what_into(what, into)
    end
  end
end

#first?Boolean Also known as: input?, original_dataset?

#

first?

#

Returns:

  • (Boolean)


154
155
156
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 154

def first?
  @commandline_arguments.first
end
#

menu (menu tag)

#


123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 123

def menu(
    i = commandline_arguments?
  )
  if i.is_a? Array
    i.each {|entry| menu(entry) }
  else
    case i # case tag
    # ===================================================================== #
    # === --random
    #
    # This entry point will try to randomly grab an (any) entry.
    # Thus it will often fail.
    # ===================================================================== #
    when /^-?-?random$/
      i = return_url_from_a_random_entry
      set_url(i)
    end
  end
end

#report_the_frequencies_properly_formattedObject

#

report_the_frequencies_properly_formatted

#


202
203
204
205
206
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 202

def report_the_frequencies_properly_formatted
  @hash_storing_the_codon_frequencies.each_pair {|key, value|
    e "#{key}: #{value.to_s.rjust(4)}"
  }
end

#resetObject

#

reset (reset tag)

#


96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 96

def reset
  super()
  infer_the_namespace
  # ======================================================================= #
  # === @use_this_url
  #
  # This variable will be nil on startup, so that this class can decide
  # whether to download from a remote URL or not.
  # ======================================================================= #
  @use_this_url = nil
  # ======================================================================= #
  # === @hash_storing_the_codon_frequencies
  # ======================================================================= #
  @hash_storing_the_codon_frequencies = {}
end

#return_url_from_a_random_entryObject

#

return_url_from_a_random_entry (random tag)

#


146
147
148
149
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 146

def return_url_from_a_random_entry
  'https://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species='+
  (rand(100_000)+1).to_s
end

#runObject

#

run (run tag)

#


283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 283

def run
  determine_whether_we_have_to_download_from_a_remote_url
  if @use_this_url and @use_this_url.start_with?('http')
    opnn; e 'Obtaining dataset from the following remote URL:'
    e
    e sfancy("  #{@use_this_url}")
    e
    _ = @use_this_url
    dataset_obtained = URI.open(_).read
    set_first(
      dataset_obtained
    )
  else
    set_first(first?)
  end
  splitted = first?.split("\n").reject {|line| line.to_s.strip.empty? }
  splitted.each {|line|
    # ===================================================================== #
    # This line may look like:
    #
    #   UUC 25.9(  3630)  UCC 18.9(  2641)  UAC 18.9(  2640)  UGC 13.8(  1928)
    #
    # ===================================================================== #
    inner_splitted = line.scan(USE_THIS_REGEX)
    inner_splitted.each {|key, value|
      @hash_storing_the_codon_frequencies[key] = value
    }
  }
  report_the_frequencies_properly_formatted
  do_store_into_a_local_yaml_file if store_locally?
end

#set_commandline_arguments(i) ⇒ Object

#

set_commandline_arguments

#


115
116
117
118
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 115

def set_commandline_arguments(i)
  @commandline_arguments = [i].flatten.compact
  menu(@commandline_arguments)
end

#set_first(i) ⇒ Object

#

set_first

#


162
163
164
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 162

def set_first(i)
  @commandline_arguments[0] = i
end

#set_use_this_url(i = nil) ⇒ Object Also known as: set_url

#

set_use_this_url

#


169
170
171
172
173
174
175
176
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 169

def set_use_this_url(i = nil)
  if i.is_a? Array
    i = i.first
  end
  i = SAMPLE_INPUT if i.nil? # Use default in this case.
  i = i.to_s.dup
  @use_this_url = i
end

#store_locally?Boolean

#

store_locally?

#

Returns:

  • (Boolean)


211
212
213
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 211

def store_locally?
  AUTOMATICALLY_CREATE_A_LOCAL_YAML_FILE_IF_IT_DOES_NOT_YET_EXIST
end

#use_this_url?Boolean Also known as: remote_url?, url?

#

use_this_url?

#

Returns:

  • (Boolean)


181
182
183
# File 'lib/bioroebe/codon_tables/frequencies/parse_frequency_table.rb', line 181

def use_this_url?
  @use_this_url
end