Class: SequenceServer::MAKEBLASTDB
- Inherits:
-
Object
- Object
- SequenceServer::MAKEBLASTDB
- Extended by:
- Forwardable
- Defined in:
- lib/sequenceserver/makeblastdb.rb,
lib/sequenceserver/makeblastdb-modified-with-cache.rb
Overview
Smart makeblastdb wrapper: recursively scans database directory determining which files need to be formatted or re-formatted.
Example usage:
makeblastdb = MAKEBLASTDB.new(database_dir)
makeblastdb.scan && makeblastdb.run
Instance Attribute Summary collapse
-
#database_dir ⇒ Object
readonly
Returns the value of attribute database_dir.
-
#fastas_to_format ⇒ Object
readonly
Determines which FASTA files in the database directory are unformatted.
-
#fastas_to_reformat ⇒ Object
readonly
Determines which FASTA files in the database directory require reformatting.
-
#formatted_fastas ⇒ Object
readonly
Determines which FASTA files in the database directory are already formatted.
Instance Method Summary collapse
-
#any_formatted? ⇒ Boolean
Returns true if at least one database in database directory is formatted.
-
#any_incompatible? ⇒ Boolean
Returns true if the databases directory contains one or more incompatible databases.
- #any_to_format? ⇒ Boolean
- #any_to_format_or_reformat? ⇒ Boolean
-
#any_unformatted? ⇒ Boolean
Returns true if there is at least one unformatted FASTA in the databases directory.
-
#format ⇒ Object
Format any unformatted FASTA files in database directory.
-
#initialize(database_dir) ⇒ MAKEBLASTDB
constructor
A new instance of MAKEBLASTDB.
- #no_fastas? ⇒ Boolean
-
#reformat ⇒ Object
Re-format databases that require reformatting.
-
#run ⇒ Object
Runs makeblastdb on each file in ‘@fastas_to_format` and `@fastas_to_reformat`.
-
#scan ⇒ Object
Scans the database directory to determine which FASTA files require formatting or re-formatting.
Constructor Details
#initialize(database_dir) ⇒ MAKEBLASTDB
Returns a new instance of MAKEBLASTDB.
19 20 21 |
# File 'lib/sequenceserver/makeblastdb.rb', line 19 def initialize(database_dir) @database_dir = database_dir end |
Instance Attribute Details
#database_dir ⇒ Object (readonly)
Returns the value of attribute database_dir.
23 24 25 |
# File 'lib/sequenceserver/makeblastdb.rb', line 23 def database_dir @database_dir end |
#fastas_to_format ⇒ Object (readonly)
Determines which FASTA files in the database directory are unformatted.
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/sequenceserver/makeblastdb.rb', line 111 def fastas_to_format return @fastas_to_format if defined?(@fastas_to_format) formatted_fasta_paths = formatted_fastas.map { |f| f[0] } fasta_paths_to_format = probably_fastas - formatted_fasta_paths @fastas_to_format = fasta_paths_to_format.map do |path| [ path, make_db_title(path), guess_sequence_type_in_fasta(path) ] end @fastas_to_format end |
#fastas_to_reformat ⇒ Object (readonly)
Determines which FASTA files in the database directory require reformatting.
98 99 100 101 102 103 104 105 106 107 |
# File 'lib/sequenceserver/makeblastdb.rb', line 98 def fastas_to_reformat return @fastas_to_reformat if defined?(@fastas_to_reformat) @fastas_to_reformat = [] formatted_fastas.each do |ff| @fastas_to_reformat << [ff.path, ff.title, ff.type, ff.non_parse_seqids?] if ff.v4? || ff.non_parse_seqids? end @fastas_to_reformat end |
#formatted_fastas ⇒ Object (readonly)
Determines which FASTA files in the database directory are already formatted.
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/sequenceserver/makeblastdb.rb', line 70 def formatted_fastas return @formatted_fastas if defined?(@formatted_fastas) @formatted_fastas = [] blastdbcmd.each_line do |line| path, *rest = line.chomp.split("\t") next if multipart_database_name?(path) rest << get_categories(path) @formatted_fastas << Database.new(path, *rest) end @formatted_fastas end |
Instance Method Details
#any_formatted? ⇒ Boolean
Returns true if at least one database in database directory is formatted.
49 50 51 |
# File 'lib/sequenceserver/makeblastdb-modified-with-cache.rb', line 49 def any_formatted? formatted_fastas.any? end |
#any_incompatible? ⇒ Boolean
Returns true if the databases directory contains one or more incompatible databases.
Note that it is okay to only use V4 databases or only V5 databases. Incompatibility arises when they are mixed.
64 65 66 67 68 |
# File 'lib/sequenceserver/makeblastdb-modified-with-cache.rb', line 64 def any_incompatible? return false if @formatted_fastas.all? { |ff| ff.v4? || ff.alias? } return false if @formatted_fastas.all? { |ff| ff.v5? || ff.alias? } true end |
#any_to_format? ⇒ Boolean
86 87 88 |
# File 'lib/sequenceserver/makeblastdb.rb', line 86 def any_to_format? fastas_to_format.any? end |
#any_to_format_or_reformat? ⇒ Boolean
29 30 31 |
# File 'lib/sequenceserver/makeblastdb.rb', line 29 def any_to_format_or_reformat? any_to_format? || any_to_reformat? end |
#any_unformatted? ⇒ Boolean
Returns true if there is at least one unformatted FASTA in the databases directory.
55 56 57 |
# File 'lib/sequenceserver/makeblastdb-modified-with-cache.rb', line 55 def any_unformatted? !@fastas_to_format.empty? end |
#format ⇒ Object
Format any unformatted FASTA files in database directory. Returns Array of files that were formatted.
46 47 48 49 50 51 52 53 54 |
# File 'lib/sequenceserver/makeblastdb.rb', line 46 def format # Make the intent clear as well as ensure the program won't crash if we # accidentally call format before calling scan. return unless any_to_format? fastas_to_format.select do |path, title, type| make_blast_database('format', path, title, type) end end |
#no_fastas? ⇒ Boolean
33 34 35 |
# File 'lib/sequenceserver/makeblastdb.rb', line 33 def no_fastas? probably_fastas.empty? end |
#reformat ⇒ Object
Re-format databases that require reformatting. Returns Array of files that were reformatted.
58 59 60 61 62 63 64 65 66 |
# File 'lib/sequenceserver/makeblastdb.rb', line 58 def reformat # Make the intent clear as well as ensure the program won't crash if # we accidentally call reformat before calling scan. return unless any_to_reformat? fastas_to_reformat.select do |path, title, type, non_parse_seqids| make_blast_database('reformat', path, title, type, non_parse_seqids) end end |
#run ⇒ Object
Runs makeblastdb on each file in ‘@fastas_to_format` and `@fastas_to_reformat`. Will do nothing unless #scan has been run before.
39 40 41 42 |
# File 'lib/sequenceserver/makeblastdb.rb', line 39 def run format reformat end |
#scan ⇒ Object
Scans the database directory to determine which FASTA files require formatting or re-formatting.
Returns true if there are files to (re-)format, false otherwise.
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/sequenceserver/makeblastdb-modified-with-cache.rb', line 31 def scan # We need to know the list of formatted FASTAs as reported by blastdbcmd # first. This is required to determine both unformatted FASTAs and those # that require reformatting. @formatted_fastas = [] determine_formatted_fastas # Now determine FASTA files that are unformatted or require reformatting. @fastas_to_format = [] determine_unformatted_fastas @fastas_to_reformat = [] determine_fastas_to_reformat # Return true if there are files to be (re-)formatted or false otherwise. !@fastas_to_format.empty? || !@fastas_to_reformat.empty? end |