Class: SequenceServer::MAKEBLASTDB

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/sequenceserver/makeblastdb.rb,
lib/sequenceserver/makeblastdb-modified-with-cache.rb

Overview

Smart makeblastdb wrapper: recursively scans database directory determining which files need to be formatted or re-formatted.

Example usage:

makeblastdb = MAKEBLASTDB.new(database_dir)
makeblastdb.scan && makeblastdb.run

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(database_dir) ⇒ MAKEBLASTDB

Returns a new instance of MAKEBLASTDB.



19
20
21
# File 'lib/sequenceserver/makeblastdb.rb', line 19

def initialize(database_dir)
  @database_dir = database_dir
end

Instance Attribute Details

#database_dirObject (readonly)

Returns the value of attribute database_dir.



23
24
25
# File 'lib/sequenceserver/makeblastdb.rb', line 23

def database_dir
  @database_dir
end

#fastas_to_formatObject (readonly)

Determines which FASTA files in the database directory are unformatted.



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/sequenceserver/makeblastdb.rb', line 111

def fastas_to_format
  return @fastas_to_format if defined?(@fastas_to_format)

  formatted_fasta_paths = formatted_fastas.map { |f| f[0] }
  fasta_paths_to_format = probably_fastas - formatted_fasta_paths

  @fastas_to_format = fasta_paths_to_format.map do |path|
    [
      path,
      make_db_title(path),
      guess_sequence_type_in_fasta(path)
    ]
  end

  @fastas_to_format
end

#fastas_to_reformatObject (readonly)

Determines which FASTA files in the database directory require reformatting.



98
99
100
101
102
103
104
105
106
107
# File 'lib/sequenceserver/makeblastdb.rb', line 98

def fastas_to_reformat
  return @fastas_to_reformat if defined?(@fastas_to_reformat)

  @fastas_to_reformat = []
  formatted_fastas.each do |ff|
    @fastas_to_reformat << [ff.path, ff.title, ff.type, ff.non_parse_seqids?] if ff.v4? || ff.non_parse_seqids?
  end

  @fastas_to_reformat
end

#formatted_fastasObject (readonly)

Determines which FASTA files in the database directory are already formatted.



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/sequenceserver/makeblastdb.rb', line 70

def formatted_fastas
  return @formatted_fastas if defined?(@formatted_fastas)

  @formatted_fastas = []

  blastdbcmd.each_line do |line|
    path, *rest = line.chomp.split("\t")
    next if multipart_database_name?(path)

    rest << get_categories(path)
    @formatted_fastas << Database.new(path, *rest)
  end

  @formatted_fastas
end

Instance Method Details

#any_formatted?Boolean

Returns true if at least one database in database directory is formatted.

Returns:

  • (Boolean)


49
50
51
# File 'lib/sequenceserver/makeblastdb-modified-with-cache.rb', line 49

def any_formatted?
  formatted_fastas.any?
end

#any_incompatible?Boolean

Returns true if the databases directory contains one or more incompatible databases.

Note that it is okay to only use V4 databases or only V5 databases. Incompatibility arises when they are mixed.

Returns:

  • (Boolean)


64
65
66
67
68
# File 'lib/sequenceserver/makeblastdb-modified-with-cache.rb', line 64

def any_incompatible?
  return false if @formatted_fastas.all? { |ff| ff.v4? || ff.alias? }
  return false if @formatted_fastas.all? { |ff| ff.v5? || ff.alias? }
  true
end

#any_to_format?Boolean

Returns:

  • (Boolean)


86
87
88
# File 'lib/sequenceserver/makeblastdb.rb', line 86

def any_to_format?
  fastas_to_format.any?
end

#any_to_format_or_reformat?Boolean

Returns:

  • (Boolean)


29
30
31
# File 'lib/sequenceserver/makeblastdb.rb', line 29

def any_to_format_or_reformat?
  any_to_format? || any_to_reformat?
end

#any_unformatted?Boolean

Returns true if there is at least one unformatted FASTA in the databases directory.

Returns:

  • (Boolean)


55
56
57
# File 'lib/sequenceserver/makeblastdb-modified-with-cache.rb', line 55

def any_unformatted?
  !@fastas_to_format.empty?
end

#formatObject

Format any unformatted FASTA files in database directory. Returns Array of files that were formatted.



46
47
48
49
50
51
52
53
54
# File 'lib/sequenceserver/makeblastdb.rb', line 46

def format
  # Make the intent clear as well as ensure the program won't crash if we
  # accidentally call format before calling scan.
  return unless any_to_format?

  fastas_to_format.select do |path, title, type|
    make_blast_database('format', path, title, type)
  end
end

#no_fastas?Boolean

Returns:

  • (Boolean)


33
34
35
# File 'lib/sequenceserver/makeblastdb.rb', line 33

def no_fastas?
  probably_fastas.empty?
end

#reformatObject

Re-format databases that require reformatting. Returns Array of files that were reformatted.



58
59
60
61
62
63
64
65
66
# File 'lib/sequenceserver/makeblastdb.rb', line 58

def reformat
  # Make the intent clear as well as ensure the program won't crash if
  # we accidentally call reformat before calling scan.
  return unless any_to_reformat?

  fastas_to_reformat.select do |path, title, type, non_parse_seqids|
    make_blast_database('reformat', path, title, type, non_parse_seqids)
  end
end

#runObject

Runs makeblastdb on each file in ‘@fastas_to_format` and `@fastas_to_reformat`. Will do nothing unless #scan has been run before.



39
40
41
42
# File 'lib/sequenceserver/makeblastdb.rb', line 39

def run
  format
  reformat
end

#scanObject

Scans the database directory to determine which FASTA files require formatting or re-formatting.

Returns true if there are files to (re-)format, false otherwise.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/sequenceserver/makeblastdb-modified-with-cache.rb', line 31

def scan
  # We need to know the list of formatted FASTAs as reported by blastdbcmd
  # first. This is required to determine both unformatted FASTAs and those
  # that require reformatting.
  @formatted_fastas = []
  determine_formatted_fastas

  # Now determine FASTA files that are unformatted or require reformatting.
  @fastas_to_format = []
  determine_unformatted_fastas
  @fastas_to_reformat = []
  determine_fastas_to_reformat

  # Return true if there are files to be (re-)formatted or false otherwise.
  !@fastas_to_format.empty? || !@fastas_to_reformat.empty?
end