Class: Bioroebe::Taxonomy::Info

Inherits:
CommandlineApplication show all
Includes:
Constants
Defined in:
lib/bioroebe/taxonomy/info/info.rb

Constant Summary collapse

DEFAULT_TARGET =
#

DEFAULT_TARGET

#
"#{Constants::INFO_DIR}Nematostella_vectensis.INFO"

Constants included from Constants

Constants::AA_DIR, Constants::ARRAY_PROJECT_FILES, Constants::Archaea_Taxonomy_ID, Constants::BASE, Constants::BASE_URL, Constants::BE_VERBOSE, Constants::Bacteria_Taxonomy_ID, Constants::CITATIONS, Constants::CURATED_DIR, Constants::DATA_DIR, Constants::DELNODES, Constants::DIVISION, Constants::Eukaryota_Taxonomy_ID, Constants::FILE_USE_THIS_DATABASE, Constants::GEM_DIR, Constants::GENCODE, Constants::INCOMING_DIR, Constants::INFO_DIR, Constants::LAST_INTERACTIVE_COMMAND, Constants::LOCALOME_DIR, Constants::LOCAL_MIRROR, Constants::MERGED, Constants::MODULE_PATH, Constants::NAMES, Constants::NAMES_SQL, Constants::NCBI_BASE, Constants::NODES, Constants::NODES_SQL, Constants::NT_DIR, Constants::POSTGRESQL_QUERY_SIZE, Constants::POSTGRE_LOGIN_COMMAND, Constants::PROJECT_DOC_DIR, Constants::SEQUENCES_DIR, Constants::SHARED_HOME, Constants::TAXONOMY_BROWSER, Constants::TEMP_DIR, Constants::TEST_DIR, Constants::TMP_DIR, Constants::URL1

Constants inherited from CommandlineApplication

CommandlineApplication::OLD_VERBOSE_VALUE

Constants included from ColoursForBase

ColoursForBase::ARRAY_HTML_COLOURS_IN_USE

Constants inherited from Base

Base::NAMESPACE

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Constants

#info_dir?, #work_directory?

Methods inherited from CommandlineApplication

#all_aminoacids?, #append_what_into, #at_home?, #be_silent, #be_verbose?, #cat, #ccliner, #change_directory, #cliner, #codon_table_dataset?, #codon_to_aminoacid, #codons_for?, #colourize_this_dna_sequence, #complement, #cp, #disable_warnings, #download_dir?, #editor?, #enable_warnings, #ensure_that_the_base_directories_exist, #esystem, #extract, #is_this_a_start_codon?, #is_this_a_stop_codon?, #leading_five_prime, #load_bioroebe_yaml_file, #log_directory?, #one_letter_to_long_name, #one_to_three, #only_numbers?, #open_in_browser, #opnerev, #opnn, #pad_with_double_quotes, #pad_with_single_quotes, #partner_nucleotide, #remove_numbers, #remove_trailing_ansii_escape_code, #return_all_possible_start_codons, #return_array_of_one_letter_aminoacids, #return_cheerful_person, #return_chunked_display, #return_ubiquitin_sequence, #runmode?, #set_be_verbose, #set_runmode, #start_codon?, #stop_codons?, #strict_filter_away_invalid_aminoacids, #taxonomy_download_directory?, #three_to_one, #to_rna, #trailing_three_prime, #use_opn?, #verbose_truth, #was_or_were, #without_extname, #write_what_into

Methods included from BaseModule

#absolute_path, #default_file_read, #file_readlines

Methods included from CommandlineArguments

#commandline_arguments?, #commandline_arguments_that_are_files?, #e, #first?, #first_non_hyphen_argument?, #remove_hyphens_from_the_commandline_arguments, #return_commandline_arguments_as_string, #return_commandline_arguments_that_are_not_files, #return_entries_without_two_leading_hyphens, #select_commandline_arguments, #select_entries_starting_with_two_hyphens, #set_commandline_arguments

Methods included from ColoursForBase

#colourize_this_aminoacid_sequence_for_the_commandline, #colourize_this_nucleotide_sequence, #disable_colours, #ecomment, #efancy, #egold, #enable_colours, #eorange, #eparse, #erev, #remove_trailing_escape_part, #return_colour_for_nucleotides, #rev, #sdir, #set_will_we_use_colours, #sfancy, #sfile, #simp, #swarn, #use_colours?, #use_colours_within_the_bioroebe_namespace?

Methods inherited from Base

#append_what_into, #can_base_pair?, #convert_global_env, #delete_file, #directory_to_the_codon_tables?, #is_on_roebe?, #is_palindrome?, #main_encoding?, #mkdir, #move_file, #mv, #no_file_exists_at, #no_newlines, #project_yaml_directory?, #rds, #register_sigint, #return_pwd, #return_the_first_line_of_this_file, #word_wrap, #write_what_into

Methods included from InternalHashModule

#internal_hash?, #reset_the_internal_hash

Methods included from InferTheNamespaceModule

#infer_the_namespace, #namespace?

Constructor Details

#initialize(i = DEFAULT_TARGET, run_already = true) ⇒ Info

#

initialize

#


44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/bioroebe/taxonomy/info/info.rb', line 44

def initialize(
    i           = DEFAULT_TARGET,
    run_already = true
  )
  reset
  set_location(i)
  if run_already.is_a?(Hash) or run_already == :silent
    @be_verbose = false
    run_already = true
  end
  run if run_already
end

Instance Attribute Details

#locationObject Also known as: location?

Returns the value of attribute location.



38
39
40
# File 'lib/bioroebe/taxonomy/info/info.rb', line 38

def location
  @location
end

Class Method Details

.parse(i = DEFAULT_TARGET) ⇒ Object

#

Info.parse

#


171
172
173
174
175
176
177
# File 'lib/bioroebe/taxonomy/info/info.rb', line 171

def self.parse(
    i = DEFAULT_TARGET
  )
  _ = Info.new(i)
  _.try_to_report_the_taxonomy_id
  return _
end

.status(from_this_base_dir = INFO_DIR) ⇒ Object

#

Info.status

This method will display some status information.

#


240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/bioroebe/taxonomy/info/info.rb', line 240

def self.status(
    from_this_base_dir = INFO_DIR
  )
  total_entries = Dir[from_this_base_dir+'*'].reject {|entry|
    File.directory?(entry) # Reject directories.
  }.size
  n_entries = Dir[from_this_base_dir+'*.INFO'].size
  e 'The directory at '+sdir(from_this_base_dir)+' has '+simp(n_entries)+
    ' entries for .INFO available.' if n_entries > 0
  if (total_entries - n_entries) > 0
    e 'That means that there are some entries that are not .INFO '+
      'files in that directory.'
    e '(The total number of files in that directory '+
      'is '+sfancy(total_entries)+')'
  end
end

Instance Method Details

#check_for_genbank_common_nameObject

#

check_for_genbank_common_name

#


202
203
204
205
206
207
208
209
# File 'lib/bioroebe/taxonomy/info/info.rb', line 202

def check_for_genbank_common_name
  if @data.include? 'Genbank common name' # Ok, we have a Taxonomy ID.
    @data =~ /Genbank common name: (.+)/
    @genbank_common_name = $1.dup if $1
  else
    e 'The file '+sfile(filename)+' has no Genbank common name.' if @be_verbose
  end if @data
end

#check_for_taxonomy_idObject

#

check_for_taxonomy_id

This method attempts to fetch the Taxonomy ID from the .INFO file. Some .INFO files may not have a Taxonomy ID.

#


217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/bioroebe/taxonomy/info/info.rb', line 217

def check_for_taxonomy_id
  if @data.include? 'Taxonomy ID' # Ok, we have a Taxonomy ID.
    @data =~ /Taxonomy ID: (\d+)/ #n
    @taxonomy_id = $1.dup if $1
  else
    # First check whether the file has an entry with only numbers.
    # We could use this entry, but this may give us false positives,
    # such as in the file /data/curated/sequences/INFO/Micromonas_pusilla.INFO
    # scan = @data.scan(/^\d+$/)
    # unless scan.empty?
    #   @taxonomy_id = scan[0]
    #   @taxonomy_id
    # else
      e 'The file '+sfile(filename)+' has no Taxonomy ID.' if @be_verbose
    # end
  end if @data
end

#corresponding_datafile?Boolean Also known as: corresponding_datafile

#

corresponding_datafile?

#

Returns:

  • (Boolean)


77
78
79
# File 'lib/bioroebe/taxonomy/info/info.rb', line 77

def corresponding_datafile?
  @corresponding_datafile
end

#data?Boolean

#

data?

#

Returns:

  • (Boolean)


312
313
314
# File 'lib/bioroebe/taxonomy/info/info.rb', line 312

def data?
  @data
end

#fasta?Boolean

#

fasta?

#

Returns:

  • (Boolean)


278
279
280
281
282
283
# File 'lib/bioroebe/taxonomy/info/info.rb', line 278

def fasta?
  if @corresponding_datafile.nil?
    try_to_find_likely_fasta_file
  end
  @corresponding_datafile # This should be fine now as the above method tried to determine this.
end

#filename(i = location? ) ⇒ Object

#

filename

This will return the filename of the .INFO file in question.

#


125
126
127
128
129
# File 'lib/bioroebe/taxonomy/info/info.rb', line 125

def filename(
    i = location?
  )
  return sfile(File.basename(i))
end

#red(i) ⇒ Object

#

red

#


148
149
150
# File 'lib/bioroebe/taxonomy/info/info.rb', line 148

def red(i)
  Colours.red(i)
end

#resetObject

#

reset (reset tag)

#


84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/bioroebe/taxonomy/info/info.rb', line 84

def reset
  super()
  infer_the_namespace
  # ======================================================================= #
  # === @be_verbose
  # ======================================================================= #
  set_be_verbose
  # ======================================================================= #
  # === @location
  # ======================================================================= #
  @location = nil
  # ======================================================================= #
  # === @data
  # ======================================================================= #
  @data = nil
  # ======================================================================= #
  # === @taxonomy_id
  # ======================================================================= #
  @taxonomy_id = false # By default we will assume that this file has no taxonomy id.
  # ======================================================================= #
  # === @genbank_common_name
  # ======================================================================= #
  @genbank_common_name = nil
  # ======================================================================= #
  # === @corresponding_datafile
  # ======================================================================= #
  @corresponding_datafile = nil
end

#runObject

#

run

Bundle together the various methods that check the file.

#


335
336
337
338
# File 'lib/bioroebe/taxonomy/info/info.rb', line 335

def run
  check_for_taxonomy_id # Determine @taxonomy_id here.
  check_for_genbank_common_name
end

#set_location(i = nil) ⇒ Object

#

set_location

#


60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/bioroebe/taxonomy/info/info.rb', line 60

def set_location(i = nil)
  i = DEFAULT_TARGET if i.nil?
  i = i.to_s # Should always be a String.
  if File.exist?(i)
    @data = File.read(i)
  elsif File.exist? INFO_DIR+File.basename(i)
    @data = File.read(INFO_DIR+File.basename(i))
  else
    opnn; e 'No file was found at `'+sfile(i)+'`.'
  end
  i = return_pwd+i unless i.include? '/'
  @location = i
end

#set_taxonomy_id(i) ⇒ Object Also known as: taxonomy_id=

#

set_taxonomy_id

#


319
320
321
# File 'lib/bioroebe/taxonomy/info/info.rb', line 319

def set_taxonomy_id(i)
  @taxonomy_id = i
end

#show_info(use_cliner = true) ⇒ Object Also known as: show

#

show_info

Show info here. This will delegate to the method show_info_output. We decide whether we will use cliner or not.

#


137
138
139
140
141
142
143
# File 'lib/bioroebe/taxonomy/info/info.rb', line 137

def show_info(use_cliner = true)
  if use_cliner
    cliner { show_info_output }
  else
    show_info_output
  end
end

#show_info_outputObject

#

show_info_output

Show (all) relevant info here.

#


157
158
159
160
161
162
163
164
165
166
# File 'lib/bioroebe/taxonomy/info/info.rb', line 157

def show_info_output
  e red('@location')+' is at: '+
    sfancy(@location.to_s)
  e red('@taxonomy_id')+' is at: '+
    sfancy(@taxonomy_id.to_s) if @taxonomy_id
  e red('@genbank_common_name')+' at: '+
    sfancy(@genbank_common_name.to_s) if @genbank_common_name
  e red('@data')+' (the full content of the file) is: '+
    simp(@data.to_s)
end

#show_likely_fasta_file_if_it_can_be_foundObject

#

show_likely_fasta_file_if_it_can_be_found

We will show likely fasta file, if it can be found.

#


184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/bioroebe/taxonomy/info/info.rb', line 184

def show_likely_fasta_file_if_it_can_be_found
  entries = try_to_find_likely_fasta_file
  if @corresponding_datafile
    e "We found #{entries.size} match(es) of potential .fa files. "\
      "Now showing all of them."
    entries.map {|entry| e '  '+sfile(entry) } # Display via padding.
    e '(This can be parsed through class ParseFasta, which is '\
      'aliased to "pfasta"'
    e 'in the interactive shell; or simply paste the file path '\
      'into the shell.)'
  else
    e 'We could not find a corresponding local fasta file.'
  end
end

#taxidObject Also known as: tax_id, taxid?, has_taxonomy_id?

#

taxid

The alias hax_taxonomy_id? will feedback whether we have a taxonomy id or not.

#


291
292
293
# File 'lib/bioroebe/taxonomy/info/info.rb', line 291

def taxid
  @taxonomy_id
end

#taxonomy_id?Boolean Also known as: taxonomy_id

#

taxonomy_id?

#

Returns:

  • (Boolean)


326
327
328
# File 'lib/bioroebe/taxonomy/info/info.rb', line 326

def taxonomy_id?
  @taxonomy_id
end

#try_to_find_likely_fasta_file(i = @location) ⇒ Object Also known as: find_fasta

#

try_to_find_likely_fasta_file

We attempt to find a likely fasta file. The current directory has priority over the AA_DIR.

#


263
264
265
266
267
268
269
270
271
272
273
# File 'lib/bioroebe/taxonomy/info/info.rb', line 263

def try_to_find_likely_fasta_file(i = @location)
  _ = without_extname(File.basename(i))+'*.fa' # This will be a Filename glob, such as: "Nematostella_vectensis_pep*.fa"
  if Dir[return_pwd+_].size > 0
    entries = Dir[return_pwd+_]
  else
    where = AA_DIR+_
    entries = Dir[where]
  end
  @corresponding_datafile = entries.first
  return entries
end

#try_to_report_the_taxonomy_idObject Also known as: report_id

#

try_to_report_the_taxonomy_id

Report about the ID but only if it is known.

#


302
303
304
305
306
307
# File 'lib/bioroebe/taxonomy/info/info.rb', line 302

def try_to_report_the_taxonomy_id
  if @taxonomy_id
    opnn; e 'The taxonomy_id that we could find is: '+
             sfancy(@taxonomy_id)
  end
end