Class: Bio::FlatFileIndex

Inherits:
Object show all
Defined in:
lib/bio/io/flatfile/index.rb,
lib/bio/io/flatfile/bdb.rb,
lib/bio/io/flatfile/indexer.rb

Overview

Bio::FlatFileIndex is a class for OBDA flatfile index.

Defined Under Namespace

Modules: BDB_1, BDBdefault, DEBUG, Flat_1, Indexer, Template Classes: BDBwrapper, DataBank, FileID, FileIDs, NameSpaces, Results

Constant Summary collapse

MAGIC_FLAT =

magic string for flat/1 index

'flat/1'
MAGIC_BDB =

magic string for BerkeleyDB/1 index

'BerkeleyDB/1'

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name) ⇒ FlatFileIndex

Opens existing databank. Databank is a directory which contains indexed files and configuration files. The type of the databank (flat or BerkeleyDB) are determined automatically.

Unlike FlatFileIndex.open, block is not allowed.



113
114
115
# File 'lib/bio/io/flatfile/index.rb', line 113

def initialize(name)
  @db = DataBank.open(name)
end

Class Method Details

.formatstring2class(format_string) ⇒ Object



734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
# File 'lib/bio/io/flatfile/indexer.rb', line 734

def self.formatstring2class(format_string)
  case format_string
  when /genbank/i
    dbclass = Bio::GenBank
  when /genpept/i
    dbclass = Bio::GenPept
  when /embl/i
    dbclass = Bio::EMBL
  when /sptr/i
    dbclass = Bio::SPTR
  when /fasta/i
    dbclass = Bio::FastaFormat
  else
    raise "Unsupported format : #{format}"
  end
end

.makeindex(is_bdb, dbname, format, options, *files) ⇒ Object



751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
# File 'lib/bio/io/flatfile/indexer.rb', line 751

def self.makeindex(is_bdb, dbname, format, options, *files)
  if format then
    dbclass = formatstring2class(format)
  else
    dbclass = Bio::FlatFile.autodetect_file(files[0])
    raise "Cannot determine format" unless dbclass
    DEBUG.print "file format is #{dbclass}\n"
  end

  options = {} unless options
  pns = options['primary_namespace']
  sns = options['secondary_namespaces']

  parser = Indexer::Parser.new(dbclass, pns, sns)

  #if /(EMBL|SPTR)/ =~ dbclass.to_s then
    #a = [ 'DR' ]
    #parser.add_secondary_namespaces(*a)
  #end
  if sns = options['additional_secondary_namespaces'] then
    parser.add_secondary_namespaces(*sns)
  end

  if is_bdb then
    Indexer::makeindexBDB(dbname, parser, options, *files)
  else
    Indexer::makeindexFlat(dbname, parser, options, *files)
  end
end

.open(name) ⇒ Object

Opens existing databank. Databank is a directory which contains indexed files and configuration files. The type of the databank (flat or BerkeleyDB) are determined automatically.

If block is given, the databank object is passed to the block. The databank will be automatically closed when the block terminates.



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/bio/io/flatfile/index.rb', line 88

def self.open(name)
  if block_given? then
    begin
      i = self.new(name)
      r = yield i
    ensure
      if i then
        begin
          i.close
        rescue IOError
        end
      end
    end
  else
    r = self.new(name)
  end
  r
end

.update_index(dbname, format, options, *files) ⇒ Object

def makeindex



781
782
783
784
785
786
787
788
# File 'lib/bio/io/flatfile/indexer.rb', line 781

def self.update_index(dbname, format, options, *files)
  if format then
    parser = Indexer::Parser.new(dbclass)
  else
    parser = nil
  end
  Indexer::update_index(dbname, parser, options, *files)
end

Instance Method Details

#always_check_consistency(bool) ⇒ Object

If true, consistency checks will be performed every time accessing flatfiles. If nil/false, no checks are performed.

By default, always_check_consistency is true.



297
298
299
# File 'lib/bio/io/flatfile/index.rb', line 297

def always_check_consistency(bool)
  @db.always_check
end

#always_check_consistency=(bool) ⇒ Object

If true is given, consistency checks will be performed every time accessing flatfiles. If nil/false, no checks are performed.

By default, always_check_consistency is true.



288
289
290
# File 'lib/bio/io/flatfile/index.rb', line 288

def always_check_consistency=(bool)
  @db.always_check=(bool)
end

#check_consistencyObject

Check consistency between the databank(index) and original flat files.

If the original flat files are changed after creating the databank, raises RuntimeError.

Note that this check only compares file sizes as described in the OBDA specification.



278
279
280
281
# File 'lib/bio/io/flatfile/index.rb', line 278

def check_consistency
  check_closed?
  @db.check_consistency
end

#closeObject

Closes the databank. Returns nil.



132
133
134
135
136
# File 'lib/bio/io/flatfile/index.rb', line 132

def close
  check_closed?
  @db.close
  @db = nil
end

#closed?Boolean

Returns true if already closed. Otherwise, returns false.

Returns:

  • (Boolean)


139
140
141
142
143
144
145
# File 'lib/bio/io/flatfile/index.rb', line 139

def closed?
  if @db then
    false
  else
    true
  end
end

#default_namespacesObject

Returns default namespaces. Returns an array of strings or nil. nil means all namespaces.



172
173
174
# File 'lib/bio/io/flatfile/index.rb', line 172

def default_namespaces
  @names
end

#default_namespaces=(names) ⇒ Object

Set default namespaces. default_namespaces = nil means all namespaces in the databank.

default_namespaces= [ str1, str2, ... ] means set default namespeces to str1, str2, …

Default namespaces specified in this method only affect #get_by_id, #search, and #include? methods.

Default of default namespaces is nil (that is, all namespaces are search destinations by default).



160
161
162
163
164
165
166
167
# File 'lib/bio/io/flatfile/index.rb', line 160

def default_namespaces=(names)
  if names then
    @names = []
    names.each { |x| @names.push(x.dup) }
  else
    @names = nil
  end
end

#get_by_id(key) ⇒ Object

common interface defined in registry.rb Searching databank and returns entry (or entries) as a string. Multiple entries (contatinated to one string) may be returned. Returns empty string if not found.



122
123
124
# File 'lib/bio/io/flatfile/index.rb', line 122

def get_by_id(key)
  search(key).to_s
end

#include?(key) ⇒ Boolean

Searching databank. If some entries are found, returns an array of unique IDs (primary identifiers). If not found anything, returns nil.

This method is useful when search result is very large and #search method is very slow.

Returns:

  • (Boolean)


210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/bio/io/flatfile/index.rb', line 210

def include?(key)
  check_closed?
  if @names then
    r = @db.search_namespaces_get_unique_id(key, *@names)
  else
    r = @db.search_all_get_unique_id(key)
  end
  if r.empty? then
    nil
  else
    r
  end
end

#include_in_namespaces?(key, *names) ⇒ Boolean

Same as #include?, but serching only specified namespaces.

Returns:

  • (Boolean)


226
227
228
229
230
231
232
233
234
# File 'lib/bio/io/flatfile/index.rb', line 226

def include_in_namespaces?(key, *names)
  check_closed?
  r = @db.search_namespaces_get_unique_id(key, *names)
  if r.empty? then
    nil
  else
    r
  end
end

#include_in_primary?(key) ⇒ Boolean

Same as #include?, but serching only primary namespace.

Returns:

  • (Boolean)


238
239
240
241
242
243
244
245
246
# File 'lib/bio/io/flatfile/index.rb', line 238

def include_in_primary?(key)
  check_closed?
  r = @db.search_primary_get_unique_id(key)
  if r.empty? then
    nil
  else
    r
  end
end

#namespacesObject

Returns names of namespaces defined in the databank. (example: [ ‘LOCUS’, ‘ACCESSION’, ‘VERSION’ ] )



251
252
253
254
255
256
# File 'lib/bio/io/flatfile/index.rb', line 251

def namespaces
  check_closed?
  r = secondary_namespaces
  r.unshift primary_namespace
  r
end

#primary_namespaceObject

Returns name of primary namespace as a string.



259
260
261
262
# File 'lib/bio/io/flatfile/index.rb', line 259

def primary_namespace
  check_closed?
  @db.primary.name
end

#search(key) ⇒ Object

Searching databank and returns a Bio::FlatFileIndex::Results object.



177
178
179
180
181
182
183
184
# File 'lib/bio/io/flatfile/index.rb', line 177

def search(key)
  check_closed?
  if @names then
    @db.search_namespaces(key, *@names)
  else
    @db.search_all(key)
  end
end

#search_namespaces(key, *names) ⇒ Object

Searching only specified namespeces. Returns a Bio::FlatFileIndex::Results object.



189
190
191
192
# File 'lib/bio/io/flatfile/index.rb', line 189

def search_namespaces(key, *names)
  check_closed?
  @db.search_namespaces(key, *names)
end

#search_primary(key) ⇒ Object

Searching only primary namespece. Returns a Bio::FlatFileIndex::Results object.



197
198
199
200
# File 'lib/bio/io/flatfile/index.rb', line 197

def search_primary(key)
  check_closed?
  @db.search_primary(key)
end

#secondary_namespacesObject

Returns names of secondary namespaces as an array of strings.



265
266
267
268
# File 'lib/bio/io/flatfile/index.rb', line 265

def secondary_namespaces
  check_closed?
  @db.secondary.names
end