Module: Bioroebe::Taxonomy
- Includes:
- Constants
- Included in:
- Taxonomy, Interactive
- Defined in:
- lib/bioroebe/taxonomy/edit.rb,
lib/bioroebe/taxonomy/menu.rb,
lib/bioroebe/taxonomy/node.rb,
lib/bioroebe/taxonomy/chart.rb,
lib/bioroebe/project/project.rb,
lib/bioroebe/taxonomy/shared.rb,
lib/bioroebe/taxonomy/colours.rb,
lib/bioroebe/taxonomy/taxonomy.rb,
lib/bioroebe/taxonomy/constants.rb,
lib/bioroebe/taxonomy/help/help.rb,
lib/bioroebe/taxonomy/info/info.rb,
lib/bioroebe/taxonomy/info/is_dna.rb,
lib/bioroebe/taxonomy/interactive.rb,
lib/bioroebe/taxonomy/parse_fasta.rb,
lib/bioroebe/taxonomy/class_methods.rb,
lib/bioroebe/taxonomy/help/helpline.rb,
lib/bioroebe/toplevel_methods/taxonomy.rb,
lib/bioroebe/taxonomy/info/check_available.rb
Overview
Bioroebe::Taxonomy
Defined Under Namespace
Modules: Constants, Shared Classes: Chart, CheckAvailable, Helpline, Info, Interactive, IsDNA, Node, ParseFasta
Constant Summary collapse
- PROJECT_YAML_DIR =
#
Bioroebe::Taxonomy::PROJECT_YAML_DIR
#
::Bioroebe.project_base_directory?
Constants included from Constants
Constants::AA_DIR, Constants::ARRAY_PROJECT_FILES, Constants::Archaea_Taxonomy_ID, Constants::BASE, Constants::BASE_URL, Constants::BE_VERBOSE, Constants::Bacteria_Taxonomy_ID, Constants::CITATIONS, Constants::CURATED_DIR, Constants::DATA_DIR, Constants::DELNODES, Constants::DIVISION, Constants::Eukaryota_Taxonomy_ID, Constants::FILE_USE_THIS_DATABASE, Constants::GEM_DIR, Constants::GENCODE, Constants::INCOMING_DIR, Constants::INFO_DIR, Constants::LAST_INTERACTIVE_COMMAND, Constants::LOCALOME_DIR, Constants::LOCAL_MIRROR, Constants::MERGED, Constants::MODULE_PATH, Constants::NAMES, Constants::NAMES_SQL, Constants::NCBI_BASE, Constants::NODES, Constants::NODES_SQL, Constants::NT_DIR, Constants::POSTGRESQL_QUERY_SIZE, Constants::POSTGRE_LOGIN_COMMAND, Constants::PROJECT_DOC_DIR, Constants::SEQUENCES_DIR, Constants::SHARED_HOME, Constants::TAXONOMY_BROWSER, Constants::TEMP_DIR, Constants::TEST_DIR, Constants::TMP_DIR, Constants::URL1
Class Method Summary collapse
-
.be_verbose? ⇒ Boolean
# === Taxonomy.be_verbose? ========================================================================= #.
-
.cd(i = '..') ⇒ Object
# === Bioroebe::Taxonomy.cd ========================================================================= #.
-
.cleanup ⇒ Object
# === Taxonomy.cleanup.
-
.download_directory? ⇒ Boolean
# === Bioroebe::Taxonomy.download_directory?.
-
.e(i = '') ⇒ Object
# === Bioroebe::Taxonomy.e ========================================================================= #.
-
.enable_colours ⇒ Object
# === Bioroebe::Taxonomy.enable_colours ========================================================================= #.
-
.ensure_that_temp_dir_exists ⇒ Object
# === Taxonomy.ensure_that_temp_dir_exists ========================================================================= #.
-
.interactive(i = nil) ⇒ Object
# === Taxonomy.interactive.
-
.load(_ = 'taxonomy/citations.dmp') ⇒ Object
# === Taxonomy.load.
-
.project_base_dir? ⇒ Boolean
# === Bioroebe::Taxonomy.project_base_dir?.
-
.project_yaml_dir? ⇒ Boolean
# === Bioroebe::Taxonomy.project_yaml_dir? ========================================================================= #.
-
.report_n_species ⇒ Object
# === Taxonomy.report_n_species.
-
.save_into_which_file? ⇒ Boolean
# === Bioroebe::Taxonomy.save_into_which_file? ========================================================================= #.
-
.save_when_we_last_updated_the_database(be_verbose = ::Bioroebe::BE_VERBOSE) ⇒ Object
# === Taxonomy.save_when_we_last_updated_the_database.
-
.show_current_time_and_date ⇒ Object
# === Taxonomy.show_current_time_and_date ========================================================================= #.
-
.show_remote_urls_to_the_NCBI_taxonomy_webpage(optional_id = nil) ⇒ Object
# === Bioroebe.show_remote_urls_to_the_NCBI_taxonomy_webpage.
-
.status ⇒ Object
# === Bioroebe::Taxonomy.status.
-
.status? ⇒ Boolean
# === Bioroebe::Taxonomy.status? (status tag).
-
.taxonomy_download_directory? ⇒ Boolean
# === Bioroebe::Taxonomy.taxonomy_download_directory?.
-
.update(this_dir = ::Bioroebe.taxonomy_download_directory?, be_verbose = true) ⇒ Object
# === Bioroebe::Taxonomy.update (download tag).
-
.use_colours? ⇒ Boolean
# === Bioroebe::Taxonomy.use_colours? ========================================================================= #.
Instance Method Summary collapse
-
#array_size?(array_input = citations?) ) ⇒ Boolean
# === array_size?.
-
#citations? ⇒ Boolean
# === citations?.
-
#clean(i) ⇒ Object
(also: #remove_delimiters)
# === clean.
-
#delnodes? ⇒ Boolean
# === delnodes?.
-
#division? ⇒ Boolean
# === division?.
-
#edit(i = '') ⇒ Object
# === edit (edit tag, ed tag).
-
#edit_gemspec ⇒ Object
# === edit_gemspec ========================================================================= #.
-
#edit_instructions ⇒ Object
# === edit_instructions ========================================================================= #.
-
#edit_password ⇒ Object
# === edit_password.
-
#edit_shared_code_file ⇒ Object
# === edit_shared_code_file ========================================================================= #.
-
#edit_taxonomy ⇒ Object
# === edit_taxonomy ========================================================================= #.
-
#find_highest_entries_in_sql ⇒ Object
# === find_highest_entries_in_sql.
-
#gencode? ⇒ Boolean
# === gencode?.
-
#generate_html_links_for(i) ⇒ Object
# === generate_html_links_for.
-
#get_id_of(id = 9606, be_verbose = true) ⇒ Object
# === get_id_of.
-
#get_parent_id_of(i) ⇒ Object
# === get_parent_id_of.
-
#get_scientific_name_of(taxid) ⇒ Object
(also: #get_scientific_name)
# === get_scientific_name_of.
-
#merged? ⇒ Boolean
# === merged?.
-
#names?(i = NAMES) ⇒ Boolean
# === names?.
-
#nodes?(i = NODES) ⇒ Boolean
# === nodes?.
-
#pad(i) ⇒ Object
# === pad.
-
#pad_properly(i = "5\t|\tThe domestic cat: perspective on the nature and diversity of cats.\t|\t0\t|\t8603894\t|\t \t|\t\t|\t9685 \t|\n") ⇒ Object
# === pad_properly.
-
#pad_sql(i) ⇒ Object
# === pad_sql.
-
#project_base_dir? ⇒ Boolean
(also: #base_dir?)
# === project_base_dir? ========================================================================= #.
-
#remove_delimiter(i) ⇒ Object
# === remove_delimiter.
-
#return_current_hours_minutes_seconds ⇒ Object
# === return_current_hours_minutes_seconds ========================================================================= #.
-
#return_full_lineage_of(i = nil) ⇒ Object
# === return_full_lineage_of.
-
#search_id(i = '7460') ⇒ Object
(also: #search, #id?)
# === search_id.
-
#show_current_time_and_date ⇒ Object
# === show_current_time_and_date ========================================================================= #.
-
#show_help ⇒ Object
# === show_help (help tag).
-
#split(i = citations? ) ⇒ Object
# === split (split tag).
-
#test(use_this_url = URL1) ⇒ Object
# === test ========================================================================= #.
-
#to_utf(i) ⇒ Object
(also: #sanitize)
# === to_utf ========================================================================= #.
-
#verify_proper_sql_structures ⇒ Object
# === verify_proper_sql_structures (debug tag).
Methods included from Shared
be_quiet, be_verbose?, cd, edit_login_file, eliminate_tabulator, ensure_that_download_dir_exists, ensure_that_temp_dir_exists, mkdir, readlines, set_pgpassword, show_password, show_time_now, split_at, split_at_tabulator, tokenize
Methods included from Constants
Class Method Details
.be_verbose? ⇒ Boolean
#
Taxonomy.be_verbose?
#
135 136 137 |
# File 'lib/bioroebe/taxonomy/class_methods.rb', line 135 def self.be_verbose? Taxonomy::Constants::BE_VERBOSE end |
.cd(i = '..') ⇒ Object
#
Bioroebe::Taxonomy.cd
#
68 69 70 |
# File 'lib/bioroebe/toplevel_methods/taxonomy.rb', line 68 def self.cd(i = '..') ::Bioroebe.cd(i) end |
.cleanup ⇒ Object
#
Taxonomy.cleanup
Get rid of some .sql files through this method here.
#
99 100 101 102 103 104 105 |
# File 'lib/bioroebe/taxonomy/class_methods.rb', line 99 def self.cleanup erev 'Trying to remove some files now.' i = 'names.sql' remove_file(i) if File.exist? i i = 'nodes.sql' remove_file(i) if File.exist? i end |
.download_directory? ⇒ Boolean
#
Bioroebe::Taxonomy.download_directory?
This method is needed for wget-like functionality.
#
63 64 65 |
# File 'lib/bioroebe/taxonomy/class_methods.rb', line 63 def self.download_directory? DOWNLOAD_DIR end |
.e(i = '') ⇒ Object
#
Bioroebe::Taxonomy.e
#
61 62 63 |
# File 'lib/bioroebe/toplevel_methods/taxonomy.rb', line 61 def self.e(i = '') ::Bioroebe.e(i) end |
.enable_colours ⇒ Object
#
Bioroebe::Taxonomy.enable_colours
#
21 22 23 24 |
# File 'lib/bioroebe/taxonomy/colours.rb', line 21 def self.enable_colours e 'Enabling colours.' ::Bioroebe.enable_colours end |
.ensure_that_temp_dir_exists ⇒ Object
#
Taxonomy.ensure_that_temp_dir_exists
#
75 76 77 |
# File 'lib/bioroebe/toplevel_methods/taxonomy.rb', line 75 def self.ensure_that_temp_dir_exists ::Bioroebe.ensure_that_the_base_directories_exist end |
.interactive(i = nil) ⇒ Object
#
Taxonomy.interactive
Invoke this method if you wish to directly invoke the interactive component of the Taxonomy module.
#
1962 1963 1964 |
# File 'lib/bioroebe/taxonomy/interactive.rb', line 1962 def self.interactive(i = nil) ::Bioroebe::Taxonomy::Interactive.new(i) end |
.load(_ = 'taxonomy/citations.dmp') ⇒ Object
#
Taxonomy.load
Load a specific .dmp file via this method.
The first argument should be the target location of the file that we wish to load (we assume this to be a local file for now).
#
115 116 117 118 119 120 121 122 123 |
# File 'lib/bioroebe/taxonomy/class_methods.rb', line 115 def self.load(_ = 'taxonomy/citations.dmp') if File.exist? _ data = File.readlines(_).map {|line| sanitize(line) } # Needed because File.readlines() may read in an invalid encoding. return data else e 'File at location `'+_+'` does not exist.' return nil end end |
.project_base_dir? ⇒ Boolean
#
Bioroebe::Taxonomy.project_base_dir?
The Taxonomy project has been fully integrated into the Bioroebe namespace in the year ~2015.
This method will return a String such as:
"/Programs/Ruby/2.6.4/lib/ruby/site_ruby/2.6.0/bioroebe/taxonomy/"
#
251 252 253 |
# File 'lib/bioroebe/project/project.rb', line 251 def self.project_base_dir? "#{::Bioroebe.project_base_directory?}taxonomy/" end |
.project_yaml_dir? ⇒ Boolean
#
Bioroebe::Taxonomy.project_yaml_dir?
#
258 259 260 |
# File 'lib/bioroebe/project/project.rb', line 258 def self.project_yaml_dir? Taxonomy::PROJECT_YAML_DIR end |
.report_n_species ⇒ Object
#
Taxonomy.report_n_species
This method will report how many species are registered in the NCBI database.
#
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# File 'lib/bioroebe/taxonomy/class_methods.rb', line 145 def self.report_n_species remote_url = 'http://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=statistics&unclassified=hide&uncultured=hide&unspecified=hide&period=&from=&to=' e 'We will now obtain how many species are registered. This '\ 'may take a while.' e e 'In 26.09.2011, the Taxonomy database included exactly' e '234_991 species with a formal name.' e # ======================================================================= # # See: http://rubular.com/r/awsCk2nF4D # ======================================================================= # regex_to_use = /gov\/taxonomy\/\?term.+">(\d+)<\/A><\/TD>\s+<\/TR>$/ dataset = open(remote_url).read.split(N) _ = '' is_open = false dataset.each {|line| if is_open _ << line end if line.include? '<TR><TD><A HREF="/Taxonomy/Browser/wwwtax.cgi?id=1">All taxa</A></TD>' is_open = true elsif line.include? '</TR>' is_open = false end } _ =~ regex_to_use n_species = $1.to_s.dup e 'We found `'+sfancy(n_species)+::Bioroebe.rev+'` registered species.' end |
.save_into_which_file? ⇒ Boolean
#
Bioroebe::Taxonomy.save_into_which_file?
#
20 21 22 |
# File 'lib/bioroebe/toplevel_methods/taxonomy.rb', line 20 def self.save_into_which_file? Bioroebe.taxonomy_ncbi_database_last_update_log_file end |
.save_when_we_last_updated_the_database(be_verbose = ::Bioroebe::BE_VERBOSE) ⇒ Object
#
Taxonomy.save_when_we_last_updated_the_database
Save into a file when we last updated the database. This method will make use of the constant SAVE_FILE, which tells us where to keep the save file, and can be found at: lib/taxonomy/shared/shared.rb
#
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/bioroebe/toplevel_methods/taxonomy.rb', line 31 def self.save_when_we_last_updated_the_database( be_verbose = ::Bioroebe::BE_VERBOSE ) if be_verbose # BE_VERBOSE is defined in shared/shared.rb e "Trying to log last update into file "\ "#{::Bioroebe.sfile(save_into_which_file?)} next." end # ======================================================================= # # Only store into a file if the constant SHALL_WE_LOG_LAST_UPDATE # is set to true. # ======================================================================= # if SHALL_WE_LOG_LAST_UPDATE into = save_into_which_file? # ===================================================================== # # Verify that we can actually write into the directory. # ===================================================================== # if File.stat(File.dirname(into)).writable? ::Bioroebe.write_what_into( 'Last Update of the Taxonomy NCBI Database: '+ ::Bioroebe.return_current_day_month_year+', '+ ::Bioroebe.return_current_hours_minutes_seconds+N, into ) end end end |
.show_current_time_and_date ⇒ Object
#
Taxonomy.show_current_time_and_date
#
490 491 492 493 494 495 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 490 def self.show_current_time_and_date if be_verbose? e 'The current time is '+sfancy(return_current_hours_minutes_seconds)+ ', and today is the '+simp(return_current_day_month_year)+'.' end end |
.show_remote_urls_to_the_NCBI_taxonomy_webpage(optional_id = nil) ⇒ Object
#
Bioroebe.show_remote_urls_to_the_NCBI_taxonomy_webpage
This method will show the remote URLs to different tax-IDs.
If no argument is given (thus no ID) then the NCBI taxonomy parts are displayed.
#
87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/bioroebe/toplevel_methods/taxonomy.rb', line 87 def self.show_remote_urls_to_the_NCBI_taxonomy_webpage(optional_id = nil) if optional_id erev 'http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id='+optional_id.to_s else erev 'The remote URLs towards the NCBI taxonomy part are:' e erev ' NCBI: '+sfancy('http://www.ncbi.nlm.nih.gov/taxonomy') erev ' NCBI: '+sfancy('http://www.ncbi.nlm.nih.gov/taxonomy?term=1') erev ' NCBI: '+sfancy('http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi') e end end |
.status ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/bioroebe/taxonomy/class_methods.rb', line 30 def self.status e "The status for the Taxonomy-related datafiles is as follows:#{N}" array = %w( names.dmp merged.dmp delnodes.dmp citations.dmp nodes.dmp taxdump.tar ) sorted_array = array.sort_by {|entry| File.size?(entry) } sorted_array.reverse.each {|entry| file = base_dir?+entry if File.exist? file filesize = File.size?(file) _ = ('The file `'+file+'` exists.').ljust(65, ' ') _ << (' (Filesize: '+filesize.to_s+')').ljust(30, ' ') e _ if entry.include? 'dump.tar' e 'Consider extracting this .tar-file if it was not yet '+ 'extracted - it contains the various .dmp files.' end else e 'We could not find any file at '+sfile(file) end } end |
.status? ⇒ Boolean
#
Bioroebe::Taxonomy.status? (status tag)
Feedback the status to the user.
#
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/bioroebe/taxonomy/class_methods.rb', line 72 def self.status? e e 'The localomics URL should be at:' e ' '+sfancy('http://localomics.imp.univie.ac.at:8666/') e 'The shared data of the local mirror can be found at: ' e ' '+sdir(LOCAL_MIRROR) e 'The shared folder (where we generate our .sql files '+ 'to) can be found at: ' e ' '+sdir(AUTOGENERATED_SQL_FILES_DIR) e 'The '+File.basename(LOCALOME_DIR)+'/ directory can be found at: ' e ' '+sdir(LOCALOME_DIR) e 'The '+File.basename(AA_DIR)+'/ directory can be found at: ' e ' '+sdir(AA_DIR) e 'The '+File.basename(NT_DIR)+'/ directory can be found at: ' e ' '+sdir(NT_DIR) e 'The '+File.basename(INFO_DIR)+'/ directory can be found at: ' e ' '+sdir(INFO_DIR) e 'The '+File.basename(INCOMING_DIR)+'/ directory can be found at: ' e ' '+sdir(INCOMING_DIR) e end |
.taxonomy_download_directory? ⇒ Boolean
#
Bioroebe::Taxonomy.taxonomy_download_directory?
This method will return the download directory for use in the Taxonomy subcomponent.
#
162 163 164 |
# File 'lib/bioroebe/toplevel_methods/taxonomy.rb', line 162 def self.taxonomy_download_directory? "#{::Bioroebe.log_dir?}taxonomy/" end |
.update(this_dir = ::Bioroebe.taxonomy_download_directory?, be_verbose = true) ⇒ Object
#
Bioroebe::Taxonomy.update (download tag)
This class method will obtain the file taxdump.tar.gz.
By default we will download into TEMP_DIR, which at the moment of documenting this method defaults to /tmp/robert/ on the I.M.P. cluster, or rather the value that is stored in the very constant DOWNLOAD_DIR.
#
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/bioroebe/toplevel_methods/taxonomy.rb', line 110 def self.update( this_dir = ::Bioroebe.taxonomy_download_directory?, be_verbose = true ) case be_verbose when :be_silent be_verbose = false end ::Bioroebe.show_time_now # Display the current start-time. ::Bioroebe.ensure_that_the_base_directories_exist unless File.directory? this_dir ::Bioroebe.mkdir(this_dir) end if Dir.exist? this_dir # All is fine here. else # Safeguard here. if at_home? if be_verbose this_dir = temp_dir? e "The directory at #{sdir(this_dir)} does not "\ "exist, thus trying to" e 'use '+sdir(this_dir)+' instead.' end else # Then we create that directory. ::Bioroebe.mkdir(this_dir) unless Dir.exist? this_dir end end if be_verbose e "Now trying to change to the base directory at "\ "#{::Bioroebe.sdir(this_dir)}." end cd this_dir e N+'We will next attempt to download the file `'+ ::Bioroebe.sfancy(URL_TO_TAXONOMY_ARCHIVE)+'`' e 'into the local directory `'+ ::Bioroebe.sdir(this_dir)+'` via wget.' if be_verbose e '(This file will usually have a file size of about '\ '~36 MB. [Last verification of this size: Dec 2016])' end local_file = File.basename(URL_TO_TAXONOMY_ARCHIVE) ::Bioroebe.remove_file(local_file) if File.exist? local_file ::Bioroebe.wget_download(URL_TO_TAXONOMY_ARCHIVE) # Use class Download to download something. ::Bioroebe.extract(local_file) save_when_we_last_updated_the_database end |
.use_colours? ⇒ Boolean
#
Bioroebe::Taxonomy.use_colours?
#
14 15 16 |
# File 'lib/bioroebe/taxonomy/colours.rb', line 14 def self.use_colours? ::Bioroebe.use_colours? end |
Instance Method Details
#array_size?(array_input = citations?) ) ⇒ Boolean
#
array_size?
Throwaway method to give us back all text entries from a citation base.
#
264 265 266 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 264 def array_size?(array_input = citations?) return array_input.map {|entry| split(entry)[5].size } end |
#citations? ⇒ Boolean
#
citations?
Obtain the citations.dmp dataset through this method.
This dataset includes the following keys:
cit_id, cit_key, pubmed_id,
medline_id, url,
text, taxid_list
#
255 256 257 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 255 def citations? Taxonomy.load(CITATIONS) end |
#clean(i) ⇒ Object Also known as: remove_delimiters
#
clean
Clean the input string from delimiters. Input can be Array or String.
#
122 123 124 125 126 127 128 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 122 def clean(i) if i.is_a? Array return i.map {|entry| clean(entry) } else return i.gsub(/#{MAIN_DELIMITER}/, ' ').chomp.strip # Added .chomp at 04.02.2014 end end |
#delnodes? ⇒ Boolean
#
delnodes?
Obtain the delnodes.dmp dataset. This one has deleted nodes - nodes that existed but were deleted.
#
224 225 226 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 224 def delnodes? Taxonomy.load(DELNODES) end |
#division? ⇒ Boolean
#
division?
Obtain the division.dmp dataset.
The Divisions file has these fields:
division id -- taxonomy database division id
division cde -- GenBank division code (three characters)
division name -- e.g. BCT, PLN, VRT, MAM, PRI...
Comments.
#
198 199 200 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 198 def division? Taxonomy.load(DIVISION) end |
#edit(i = '') ⇒ Object
#
edit (edit tag, ed tag)
This method allows us to quickly open the internal files.
We can typically use vim for this.
#
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/bioroebe/taxonomy/edit.rb', line 27 def edit(i = '') i = i.to_s i.gsub!(/edit_/,'') if i.include? 'edit_' case i # ======================================================================= # # === taxonomy # ======================================================================= # when /taxonomy/ # Edit the taxonomy "binary". edit_taxonomy # ======================================================================= # # === shared # ======================================================================= # when 'shared', 'shared_code' edit_shared_code_file # ======================================================================= # # === gem # ======================================================================= # when 'gem', 'gemspec' edit_gemspec # ======================================================================= # # === password # ======================================================================= # when /^password$/, 'passwd' edit_password # ======================================================================= # # === login # ======================================================================= # when 'login', 'main' edit_login_file # ======================================================================= # # === instructions # ======================================================================= # when '','instructions' # '' is default. edit_instructions end end |
#edit_gemspec ⇒ Object
#
edit_gemspec
#
71 72 73 |
# File 'lib/bioroebe/taxonomy/edit.rb', line 71 def edit_gemspec esystem Bioroebe.editor?+' '+return_gemspec_file end |
#edit_instructions ⇒ Object
#
edit_instructions
#
16 17 18 |
# File 'lib/bioroebe/taxonomy/edit.rb', line 16 def edit_instructions esystem "#{Bioroebe.editor?} #{return_instructions}" end |
#edit_password ⇒ Object
#
edit_password
We can use this method to edit the login-file.
#
94 95 96 |
# File 'lib/bioroebe/taxonomy/edit.rb', line 94 def edit_password edit_login_file ' +27' end |
#edit_shared_code_file ⇒ Object
#
edit_shared_code_file
#
85 86 87 |
# File 'lib/bioroebe/taxonomy/edit.rb', line 85 def edit_shared_code_file esystem Bioroebe.editor?+' '+return_shared_code end |
#edit_taxonomy ⇒ Object
#
edit_taxonomy
#
78 79 80 |
# File 'lib/bioroebe/taxonomy/edit.rb', line 78 def edit_taxonomy esystem Bioroebe.editor?+' '+return_taxonomy_file end |
#find_highest_entries_in_sql ⇒ Object
#
find_highest_entries_in_sql
This method will find the highest entries in the sql database. This may be useful if we wish to optimize the database (i.e. find the best varchar attribute in question).
#
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 380 def find_highest_entries_in_sql e 'We will find the various highest entries in the sql files.' this_file = TAXONOMY_DIR+'names.dmp' e 'Starting with '+sfile(this_file)+' first:' e 'Names has 4 entries - we are interested in all of them.' e this_file max_taxid = 0 max_name_txt = 0 max_unique_name = 0 max_name_class = 0 e 'Please be patient, this may take a while ...' readlines(this_file).each {|entry| _ = split_at_tabulator(entry) if _[0].to_s.size > max_taxid max_taxid = _[0].to_s.size.to_i end if _[1].to_s.size > max_name_txt max_name_txt = _[1].to_s.size.to_i end if _[2].to_s.size > max_unique_name max_unique_name = _[2].to_s.size.to_i end if _[3].to_s.size > max_name_class max_name_class = _[3].to_s.size.to_i end } max_taxid = '%05s' % max_taxid e ' max_taxid is '+sfancy(max_taxid.to_s)+' characters long (should be an int anyway).' e ' max_name_txt is '+sfancy('%05s' % max_name_txt.to_s)+' characters long.' e ' max_unique_name is '+sfancy('%05s' % max_unique_name.to_s)+' characters long.' e ' max_name_class is '+sfancy('%05s' % max_name_class.to_s)+' characters long.' # Next, we will work on nodes.dmp: this_file = TAXONOMY_DIR+'nodes.dmp' e 'Now working on '+sfile(this_file)+' first:' e 'Nodes has 3 relevant entries - taxid, parent_taxid and rank (but in total it has 13 entries)' e this_file max_taxid = 0 max_parent_taxid = 0 max_rank = 0 e 'Please be patient, this may take a while ...' readlines(this_file).each {|entry| _ = split_at_tabulator(entry) if _[0].to_s.size > max_taxid max_taxid = _[0].to_s.size.to_i end if _[1].to_s.size > max_parent_taxid max_parent_taxid = _[1].to_s.size.to_i end if _[2].to_s.size > max_rank max_rank = _[2].to_s.size.to_i end } e ' max_taxid is '+ sfancy('%05s' % max_taxid.to_s)+' characters long.' e ' max_parent_taxid is '+ sfancy('%05s' % max_parent_taxid.to_s)+' characters long.' e ' max_rank is '+ sfancy('%05s' % max_rank.to_s)+' characters long.' end |
#gencode? ⇒ Boolean
#
gencode?
Obtain gencode.dmp dataset, “genetic codes” file.
genetic code id -- GenBank genetic code id
abbreviation -- genetic code name abbreviation
name -- genetic code name
cde -- translation table for this genetic code
starts -- start codons for this genetic code
#
214 215 216 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 214 def gencode? Taxonomy.load(GENCODE) end |
#generate_html_links_for(i) ⇒ Object
#
generate_html_links_for
Input to this method should be an array of taxonomic IDs.
#
108 109 110 111 112 113 114 115 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 108 def generate_html_links_for(i) base_url = 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=' if i.is_a? Array i.each {|entry| generate_html_links_for(entry) } else e base_url+i.to_s end end |
#get_id_of(id = 9606, be_verbose = true) ⇒ Object
#
get_id_of
Use this method to query the database for a specific ID. For this to work, the database must have the ids.
#
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 357 def get_id_of( id = 9606, be_verbose = true ) id = id.to_s run_sql_query( 'SELECT * from names WHERE taxid='+id+' AND name_class=\'scientific name\' LIMIT 3;"', be_verbose ) result = run_sql_query( 'SELECT taxid from names WHERE taxid='+id+' AND name_class=\'scientific name\' LIMIT 3;"', :silent, :tuples ).strip return result end |
#get_parent_id_of(i) ⇒ Object
#
get_parent_id_of
This method is similar to the method above, but we will fetch the parent id instead.
#
322 323 324 325 326 327 328 329 330 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 322 def get_parent_id_of(i) if i.to_s.empty? e 'No valid input was given to us (in method '+__method__.to_s+').' end cmd = 'SELECT parent_taxid FROM nodes WHERE taxid='+i.to_s+' limit 3;' result = run_sql_query(cmd) # More verbose here. result = run_sql_query(cmd, true, :tuples).strip return result end |
#get_scientific_name_of(taxid) ⇒ Object Also known as: get_scientific_name
#
get_scientific_name_of
Get the scientific name here. The input should be a taxid.
Usage example:
get_scientific_name_of 333
#
342 343 344 345 346 347 348 349 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 342 def get_scientific_name_of(taxid) _ = 'SELECT name_txt FROM names where taxid='+taxid.to_s+ ' AND name_class=\'scientific name\' LIMIT 3;' result = run_sql_query(_) # _ << 'SELECT name_txt FROM names where taxid='+taxid.to_s+' LIMIT 3;"' result = run_sql_query(_, true, ' --tuples-only').strip return result end |
#merged? ⇒ Boolean
#
merged?
Obtain information from merged.dmp.
Merged nodes file fields has these ids:
old_taxid -- id of nodes which has been merged
new_taxid -- id of nodes which is result of merging
#
239 240 241 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 239 def merged? Taxonomy.load(MERGED) end |
#names?(i = NAMES) ⇒ Boolean
#
names?
Obtain the names.dmp dataset, which are “Taxonomy names”.
Four IDs are known for this set:
taxid
name_txt
unique name
name class
#
181 182 183 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 181 def names?(i = NAMES) Taxonomy.load(i) end |
#nodes?(i = NODES) ⇒ Boolean
#
nodes?
Use this method in order to obtain the nodes.dmp dataset.
Nodes are “taxonomic nodes”.
The description for each node includes the following fields:
taxid - node id in GenBank taxonomy database
parent taxid - parent node id in GenBank taxonomy database
rank - rank of this node (superkingdom, kingdom, ...)
embl code - locus-name prefix; not unique
division id - see division.dmp file
inherited div flag (1 or 0) - 1 if node inherits division from parent
genetic code id - see gencode.dmp file
inherited GC flag (1 or 0) - 1 if node inherits genetic code from parent
mitochondrial genetic code id - see gencode.dmp file
inherited MGC flag (1 or 0) - 1 if node inherits mitochondrial gencode from parent
GenBank hidden flag (1 or 0) - 1 if name is suppressed in GenBank entry lineage
hidden subtree root flag(1 or 0) - 1 if this subtree has no sequence data yet
comments - free-text comments and citations
#
154 155 156 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 154 def nodes?(i = NODES) Taxonomy.load(i) end |
#pad(i) ⇒ Object
#
pad
The input is something like:
1457406 | Bionia Mart. ex Benth., 1837 | | authority |
but it could also be an array.
#
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 86 def pad(i) if i.is_a? String i = i.chomp # Newlines aren't really useful - let's eliminate them. if i.include?(TOKEN) splitted = tokenize(i) # Now splitted input. # Next, pad sql. # splitted = splitted.map {|entry| pad_sql(entry) } # ^^^ This may lead to problems however. i = splitted end # Next, we will get rid of "'" characters. i = i.delete("'") if i.include? "'" end i = i.join("','") if i.is_a? Array return i end |
#pad_properly(i = "5\t|\tThe domestic cat: perspective on the nature and diversity of cats.\t|\t0\t|\t8603894\t|\t \t|\t\t|\t9685 \t|\n") ⇒ Object
#
pad_properly
Input is an Array. We will pad it for a proper SQL query.
#
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/bioroebe/taxonomy/node.rb', line 102 def pad_properly(i = "5\t|\tThe domestic cat: perspective on the nature and diversity of cats.\t|\t0\t|\t8603894\t|\t \t|\t\t|\t9685 \t|\n") splitted = split( sanitize(i.chomp) ) splitted = splitted.map {|e| begin string = sanitize(e).strip # token = to_utf('|') if string.include? "'" string = string.gsub(/'/, "\'") end return string rescue Exception end } return splitted.join("', '") end |
#pad_sql(i) ⇒ Object
#
pad_sql
Escape ‘ characters here.
#
92 93 94 95 |
# File 'lib/bioroebe/taxonomy/node.rb', line 92 def pad_sql(i) i = i.gsub(/'/, "\'") if i.include? "'" return i end |
#project_base_dir? ⇒ Boolean Also known as: base_dir?
#
project_base_dir?
#
128 129 130 |
# File 'lib/bioroebe/taxonomy/class_methods.rb', line 128 def project_base_dir? Taxonomy.base_dir? end |
#remove_delimiter(i) ⇒ Object
#
remove_delimiter
Get rid of the ‘|’ token.
#
502 503 504 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 502 def remove_delimiter(i) return i.delete(TOKEN) end |
#return_current_hours_minutes_seconds ⇒ Object
#
return_current_hours_minutes_seconds
#
483 484 485 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 483 def return_current_hours_minutes_seconds ::Bioroebe.return_current_hours_minutes_seconds end |
#return_full_lineage_of(i = nil) ⇒ Object
#
return_full_lineage_of
This method will return an array (or nil) with the full lineage of the given input (which should be a Taxonomic id).
The logic for this method is as follows:
-
Given an arbitrary tax ID as input, we will keep on searching the postgresql database for parent_id entries, until we eventually reach input number 1, which is the mother of all IDs. So when we have 1 finally, we can stop, and return the result (the array in question).
The full lineage is thus given as part of the returned array. The format is to not only return the ID but to also return the scientific name. In other words, our returned array will have this format:
[parent_id, scientific_name]
#
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 289 def return_full_lineage_of(i = nil) if i array = [] array << [ i, get_scientific_name_of(i) ] rescue_counter = 0 loop { id = array[-1][0] parent_id = get_parent_id_of(id) scientific_name = get_scientific_name_of(parent_id) array << [ parent_id, scientific_name ] rescue_counter += 1 break if array[-1][0].to_i == 1 if rescue_counter > 50 e 'It seems as if something is not working properly here. We reached' e 'a count of 50 now, without finding a parent id (which should be 1).' e 'It is quite unlikely that a lineage will have more than 50 subentries' e 'so we will now break out of the loop.' return array end } return array else e 'Missing input. Please provide an ID (a number, like 6).' return nil end end |
#search_id(i = '7460') ⇒ Object Also known as: search, id?
#
search_id
Search the Taxonomic ID here.
#
163 164 165 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 163 def search_id(i = '7460') e BASE_URL+'id='+i.to_s end |
#show_current_time_and_date ⇒ Object
#
show_current_time_and_date
#
509 510 511 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 509 def show_current_time_and_date Taxonomy.show_current_time_and_date end |
#show_help ⇒ Object
#
show_help (help tag)
Shows the options available in the interactive taxonomy ‘shell’.
#
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/bioroebe/taxonomy/help/help.rb', line 19 def show_help cliner { e N+'The following options are available (pwd: '+sdir(return_pwd)+'):'+ N+N Helpline[:sql,'# Use this to generate the SQL that can be '+ 'used to populate a SQL database with INSERT statements.'] e ' '*(Helpline::LJUST+3)+'# You can pass values such as 1 '+ 'or 2 (node or name) to this method.' Helpline[:info, '# Show some info about where we store data.'] Helpline[:instructions?, '# Show instructions.'] Helpline[:fasta, '# Use this to populate the fasta table.'] Helpline[:names, '# Use this to populate the names table.'] Helpline[:nodes, '# Use this to populate the nodes table.'] # ===================================================================== # # Only display the following line when colours are enabled still. # ===================================================================== # if use_colours? Helpline[:nocolours,'# Use this to disable the '+ 'colours. (Use "yescolours" to enable them again)'] end Helpline[:taxid,'# Find out the name of the organism through '\ 'the input ID from the NCBI dataset. For example: "taxid 33"'] Helpline[:table_names?,'# Use this to show the SQL command '\ 'that was used to generate the SQL Tables.'] Helpline[:verify,'# Use this to verify that the '\ '.sql files (nodes and names) are valid.'] Helpline[:verbose,'# be verbose, in other words provide '\ 'extra information to us whenever feasible'] Helpline[:ll, '# Show the content of the current working '\ 'directory.'] Helpline[:id,'# Query the postgre database to get the ID of '\ 'a given species.'] Helpline[:download,'# Download the remote NCBI database '\ '(at '+simp(URL_TO_TAXONOMY_ARCHIVE)+')'] if SHALL_WE_LOG_LAST_UPDATE Helpline[:last_update?,'# When did we last update the database'] end Helpline[:update_database, '# download the remote NCBI database, '+ 'extract it, generate nodes.sql and names.sql,'] _ = ' ' * Helpline::LJUST e _+Helpline::PADDING+' # and then populate the '\ 'postgresql-database with this information' e # This here to keep a trailing newline. } end |
#split(i = citations? ) ⇒ Object
#
split (split tag)
Split up the input on the default delimiter.
#
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 55 def split( i = citations? ) if i.is_a? Array return i.map {|entry| entry.split(MAIN_DELIMITER) } else begin i = sanitize(i) if i.include?(TOKEN) and ! i.include?("\t") i = i.split(TOKEN) else return i.split(MAIN_DELIMITER) end rescue Exception => error pp error e 'The string that failed was '+i $error = i return end end end |
#test(use_this_url = URL1) ⇒ Object
#
test
#
44 45 46 47 48 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 44 def test(use_this_url = URL1) e 'Opening URL at "'+sfancy(use_this_url)+'"' data = File.readlines(DIVISION) return data end |
#to_utf(i) ⇒ Object Also known as: sanitize
#
to_utf
#
37 38 39 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 37 def to_utf(i) ::Bioroebe.to_utf(i) end |
#verify_proper_sql_structures ⇒ Object
#
verify_proper_sql_structures (debug tag)
We will try to verify that the SQL commands are accurate.
Can also be called by issuing this:
Taxonomy.verify_proper_sql_structures
#
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 |
# File 'lib/bioroebe/taxonomy/taxonomy.rb', line 459 def verify_proper_sql_structures n_chars_to_show = 975 files = [ NODES_SQL, NAMES_SQL ] e 'We will now attempt to verify that the SQL structure is proper.' e 'We have these '+sfancy(files.size.to_s)+' .sql files.' pp files files.each {|entry| if File.exist? entry size = File.size(entry).to_s chunk = File.read(entry)[0..n_chars_to_show] e 'File '+sfile(entry)+' (Filesize: '+sfancy(size)+ ' KB) has this content '+ '(showing up to '+simp(n_chars_to_show)+' chars):' cliner { e chunk }; e else e 'We can not verify the sql structure because the file' e 'at `'+sfile(entry)+'` does not exist.' end } end |