Class: SequenceServer::Database

Inherits:
Struct
  • Object
show all
Extended by:
Enumerable, Forwardable
Defined in:
lib/sequenceserver/database.rb,
lib/sequenceserver/database.rb

Overview

Model Database’s eigenclass as a collection of Database objects.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Database

Returns a new instance of Database.



28
29
30
31
32
33
34
# File 'lib/sequenceserver/database.rb', line 28

def initialize(*args)
  args[2].downcase!   # type
  args.each(&:freeze)
  super

  @id = Digest::MD5.hexdigest args.first
end

Instance Attribute Details

#categoriesObject

Returns the value of attribute categories

Returns:

  • (Object)

    the current value of categories



21
22
23
# File 'lib/sequenceserver/database.rb', line 21

def categories
  @categories
end

#formatObject

Returns the value of attribute format

Returns:

  • (Object)

    the current value of format



21
22
23
# File 'lib/sequenceserver/database.rb', line 21

def format
  @format
end

#idObject (readonly)

Returns the value of attribute id.



36
37
38
# File 'lib/sequenceserver/database.rb', line 36

def id
  @id
end

#nameObject Also known as: path

Returns the value of attribute name

Returns:

  • (Object)

    the current value of name



21
22
23
# File 'lib/sequenceserver/database.rb', line 21

def name
  @name
end

#ncharactersObject

Returns the value of attribute ncharacters

Returns:

  • (Object)

    the current value of ncharacters



21
22
23
# File 'lib/sequenceserver/database.rb', line 21

def ncharacters
  @ncharacters
end

#nsequencesObject

Returns the value of attribute nsequences

Returns:

  • (Object)

    the current value of nsequences



21
22
23
# File 'lib/sequenceserver/database.rb', line 21

def nsequences
  @nsequences
end

#titleObject

Returns the value of attribute title

Returns:

  • (Object)

    the current value of title



21
22
23
# File 'lib/sequenceserver/database.rb', line 21

def title
  @title
end

#typeObject

Returns the value of attribute type

Returns:

  • (Object)

    the current value of type



21
22
23
# File 'lib/sequenceserver/database.rb', line 21

def type
  @type
end

#updated_onObject

Returns the value of attribute updated_on

Returns:

  • (Object)

    the current value of updated_on



21
22
23
# File 'lib/sequenceserver/database.rb', line 21

def updated_on
  @updated_on
end

Class Method Details

.[](ids) ⇒ Object



141
142
143
144
# File 'lib/sequenceserver/database.rb', line 141

def [](ids)
  ids = Array ids
  collection.values_at(*ids)
end

.allObject



150
151
152
# File 'lib/sequenceserver/database.rb', line 150

def all
  collection.values
end

.clearObject

Intended to be used only for testing.



258
259
260
# File 'lib/sequenceserver/database.rb', line 258

def clear
  collection.clear
end

.collectionObject



129
130
131
# File 'lib/sequenceserver/database.rb', line 129

def collection
  @collection ||= {}
end

.collection=(databases) ⇒ Object



133
134
135
136
137
# File 'lib/sequenceserver/database.rb', line 133

def collection=(databases)
  databases.each do |db|
    collection[db.id] = db
  end
end

.each(&block) ⇒ Object



178
179
180
# File 'lib/sequenceserver/database.rb', line 178

def each(&block)
  all.each(&block)
end

.firstObject

Intended to be used only for testing.



253
254
255
# File 'lib/sequenceserver/database.rb', line 253

def first
  all.first
end

.group_by(&block) ⇒ Object



186
187
188
# File 'lib/sequenceserver/database.rb', line 186

def group_by(&block)
  all.group_by(&block)
end

.idsObject



146
147
148
# File 'lib/sequenceserver/database.rb', line 146

def ids
  collection.keys
end

.include?(path) ⇒ Boolean

Returns:

  • (Boolean)


182
183
184
# File 'lib/sequenceserver/database.rb', line 182

def include?(path)
  collection.include? Digest::MD5.hexdigest path
end

.retrieve(loci) ⇒ Object

Retrieve given loci from the databases we have.

loci to retrieve are specified as a String:

"accession_1,accession_2:start-stop,accession_3"

Return value is a FASTA format String containing sequences in the same order in which they were requested. If an accession could not be found, a commented out error message is included in place of the sequence. Sequences are retrieved from the first database in which the accession is found. The returned sequences can, thus, be incorrect if accessions are not unique across all database (admins should make sure of that).



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/sequenceserver/database.rb', line 206

def retrieve(loci)
  # Exit early if loci is nil.
  return unless loci

  # String -> Array
  # We may have empty string if loci contains a double comma as a result
  # of typo (remember - loci is external input). These are eliminated.
  loci = loci.split(',').delete_if(&:empty?)

  # Each database is searched for each locus. For each locus, search is
  # terminated on the first database match.
  # NOTE: This can return incorrect sequence if the sequence ids are
  # not unique across all databases.
  seqs = loci.map do |locus|
    # Get sequence id and coords. coords may be nil. accession can't
    # be.
    accession, coords = locus.split(':')

    # Initialise a variable to store retrieved sequence.
    seq = nil

    # Go over each database looking for this accession.
    each do |database|
      # Database lookup  will return a string if given accession is
      # present in the database, nil otherwise.
      seq = database.retrieve(accession, coords)
      # Found a match! Terminate iteration returning the retrieved
      # sequence.
      break if seq
    end

    # If accession was not present in any database, insert an error
    # message in place of the sequence. The line starts with '#'
    # and should be ignored by BLAST (not tested).
    unless seq
      seq = "# ERROR: #{locus} not found in any database"
    end

    # Return seq.
    seq
  end

  # Array -> String
  seqs.join("\n")
end

.to_jsonObject



190
191
192
# File 'lib/sequenceserver/database.rb', line 190

def to_json
  collection.values.to_json
end

.treeObject



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/sequenceserver/database.rb', line 154

def tree
  all.each_with_object({}) do |db, data|
    data[db.type] ||= []
    use_parent = '#'
    db.categories.each_with_index do |entry, index|
      parent = index.zero? ? '#' : db.categories[0..(index - 1)].join('-')
      use_id = db.categories[0..index].join('-')
      element = { id: use_id, parent: parent, text: entry }
      data[db.type] << element unless data[db.type].include?(element)
      use_parent = use_id
    end

    data[db.type] <<
      {
        id: db.id,
        parent: use_parent,
        text: db.title,
        icon: 'glyphicon glyphicon-file'
      }

    yield(db, data[db.type].last) if block_given?
  end
end

Instance Method Details

#==(other) ⇒ Object



97
98
99
# File 'lib/sequenceserver/database.rb', line 97

def ==(other)
  @id == Digest::MD5.hexdigest(other.name)
end

#alias?Boolean

Returns true if the database was created using blastdb_aliastool.

Returns:

  • (Boolean)


93
94
95
# File 'lib/sequenceserver/database.rb', line 93

def alias?
  (%w[nal pal] & extensions).length == 1 && extensions.count == 1
end

#include?(id) ⇒ Boolean

Returns true if the database contains the given sequence id. Returns false otherwise.

Returns:

  • (Boolean)


65
66
67
68
69
70
# File 'lib/sequenceserver/database.rb', line 65

def include?(id)
  fail ArgumentError, "Invalid sequence id: #{id}" unless id =~ SequenceServer::BLAST::VALID_SEQUENCE_ID

  cmd = "blastdbcmd -entry '#{id}' -db #{name}"
  sys(cmd, path: config[:bin]) rescue false
end

#non_parse_seqids?Boolean

Return true if the database was not created using the -parse_seqids option of makeblastdb.

Returns:

  • (Boolean)


82
83
84
85
86
87
88
89
90
# File 'lib/sequenceserver/database.rb', line 82

def non_parse_seqids?
  return if alias?
  case format
  when '5'
    (%w[nog nos pog pos] & extensions).length != 2
  when '4'
    (%w[nog nsd nsi pod psd psi] & extensions).length != 3
  end
end

#retrieve(accession, coords = nil) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/sequenceserver/database.rb', line 39

def retrieve(accession, coords = nil)
  fail(
    InvalidSequenceIdError,
    "Invalid sequence id: #{accession}"
  ) unless accession =~ SequenceServer::BLAST::VALID_SEQUENCE_ID

  cmd = "blastdbcmd -db #{name} -entry '#{accession}'"

  if coords
    fail(
      InvalidParameterError,
      "Invalid range coordinates: #{coords}"
    ) unless coords =~ /[0-9]+-[0-9]*/

    cmd << " -range #{coords}"
  end
  out, = sys(cmd, path: config[:bin])
  out.chomp
rescue CommandFailed
  # Command failed beacuse stdout was empty, meaning accession not
  # present in this database.
  nil
end

#to_json(*args) ⇒ Object



105
106
107
# File 'lib/sequenceserver/database.rb', line 105

def to_json(*args)
  to_h.update(id: id).to_json(*args)
end

#to_sObject



101
102
103
# File 'lib/sequenceserver/database.rb', line 101

def to_s
  "#{type}: #{title} #{name}"
end

#v4?Boolean

Returns:

  • (Boolean)


72
73
74
# File 'lib/sequenceserver/database.rb', line 72

def v4?
  format == '4'
end

#v5?Boolean

Returns:

  • (Boolean)


76
77
78
# File 'lib/sequenceserver/database.rb', line 76

def v5?
  format == '5'
end