Class: PEROBS::BTreeBlob

Inherits:
Object
  • Object
show all
Defined in:
lib/perobs/BTreeBlob.rb

Overview

This class manages the usage of the data blobs in the corresponding HashedBlobsDB object.

Constant Summary collapse

PEROBS_MAGIC =

Magic number used for index files.

0xB78EEDB
ID =

For performance reasons we use an Array for the entries instead of a Hash. These constants specify the Array index for the corresponding value.

0
BYTES =

Number of bytes

1
START =

Start Address

2
MARKED =

Mark/Unmarked flag

3
CRC =

CRC Checksum of the data blobA

4

Instance Method Summary collapse

Constructor Details

#initialize(dir, btreedb) ⇒ BTreeBlob

Create a new BTreeBlob object.

Parameters:

  • dir (String)

    Fully qualified directory name

  • btreedb (BTreeDB)

    Reference to the DB that owns this blob



57
58
59
60
61
62
63
64
# File 'lib/perobs/BTreeBlob.rb', line 57

def initialize(dir, btreedb)
  @dir = dir
  @btreedb = btreedb

  @index_file_name = File.join(dir, 'index')
  @blobs_file_name = File.join(dir, 'data')
  read_index
end

Instance Method Details

#check(repair = false) ⇒ TrueClass/FalseClass

Run a basic consistency check.

Parameters:

  • repair (TrueClass/FalseClass) (defaults to: false)

    Not used right now

Returns:

  • (TrueClass/FalseClass)

    Always true right now



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/perobs/BTreeBlob.rb', line 167

def check(repair = false)
  # Determine size of the data blobs file.
  data_file_size = File.exist?(@blobs_file_name) ?
    File.size(@blobs_file_name) : 0

  next_start = 0
  prev_entry = nil
  @entries.each do |entry|
    # Entries should never overlap
    if prev_entry && next_start > entry[START]
      PEROBS.log.fatal
            "#{@dir}: Index entries are overlapping\n" +
            "ID: #{'%016X' % prev_entry[ID]}  " +
            "Start: #{prev_entry[START]}  " +
            "Bytes: #{prev_entry[BYTES]}\n" +
            "ID: #{'%016X' % entry[ID]}  Start: #{entry[START]}  " +
            "Bytes: #{entry[BYTES]}"
    end
    next_start = entry[START] + entry[BYTES]

    # Entries must fit within the data file
    if next_start > data_file_size
      PEROBS.log.fatal
            "#{@dir}: Entry for ID #{'%016X' % entry[ID]} " +
            "goes beyond 'data' file " +
            "size (#{data_file_size})\n" +
            "ID: #{'%016X' % entry[ID]}  Start: #{entry[START]}  " +
            "Bytes: #{entry[BYTES]}"
    end

    prev_entry = entry
  end

  true
end

#clear_marksObject

Clear the mark on all entries in the index.



105
106
107
108
# File 'lib/perobs/BTreeBlob.rb', line 105

def clear_marks
  @entries.each { |e| e[MARKED] = 0 }
  write_index
end

#delete_unmarked_entriesArray

Remove all entries from the index that have not been marked.

Returns:

  • (Array)

    List of deleted object IDs.



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/perobs/BTreeBlob.rb', line 146

def delete_unmarked_entries
  deleted_ids = []
  # First remove the entry from the hash table.
  @entries_by_id.delete_if do |id, e|
    if e[MARKED] == 0
      deleted_ids << id
      true
    else
      false
    end
  end
  # Then delete the entry itself.
  @entries.delete_if { |e| e[MARKED] == 0 }
  write_index

  deleted_ids
end

#find(id) ⇒ Array

Find the data for the object with given id.

Parameters:

  • id (Fixnum or Bignum)

    Object ID

Returns:

  • (Array)

    Returns an Array that represents the index entry for the given object.



100
101
102
# File 'lib/perobs/BTreeBlob.rb', line 100

def find(id)
  @entries_by_id[id]
end

#is_marked?(id, ignore_errors = false) ⇒ TrueClass or FalseClass

Check if the entry for a given ID is marked.

Parameters:

  • id (Fixnum or Bignum)

    ID of the entry

  • ignore_errors (Boolean) (defaults to: false)

    If set to true no errors will be raised for non-existing objects.

Returns:

  • (TrueClass or FalseClass)

    true if marked, false otherwise



135
136
137
138
139
140
141
142
# File 'lib/perobs/BTreeBlob.rb', line 135

def is_marked?(id, ignore_errors = false)
  @entries.each do |entry|
    return entry[MARKED] != 0 if entry[ID] == id
  end

  return false if ignore_errors
  PEROBS.log.fatal "Cannot find an entry for ID #{'%016X' % id} to check"
end

#mark(id) ⇒ Object

Set a mark on the entry with the given ID.

Parameters:

  • id (Fixnum or Bignum)

    ID of the entry



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/perobs/BTreeBlob.rb', line 112

def mark(id)
  found = false
  @entries.each do |entry|
    if entry[ID] == id
      entry[MARKED] = 1
      found = true
      break
    end
  end

  unless found
    PEROBS.log.fatal "Cannot find an entry for ID #{'%016X' % id} " +
      "#{id} to mark"
  end

  write_index
end

#read_object(id) ⇒ String

Read the entry for the given ID and return it as bytes.

Parameters:

  • id (Fixnum or Bignum)

    ID

Returns:

  • (String)

    sequence of bytes or nil if ID is unknown



91
92
93
94
# File 'lib/perobs/BTreeBlob.rb', line 91

def read_object(id)
  return nil unless (index_entry = find(id))
  read_from_blobs_file(index_entry)
end

#write_object(id, raw) ⇒ Object

Write the given bytes with the given ID into the DB.

Parameters:

  • id (Fixnum or Bignum)

    ID

  • raw (String)

    sequence of bytes



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/perobs/BTreeBlob.rb', line 69

def write_object(id, raw)
  if @entries.length > @btreedb.max_blob_size
    # The blob has reached the maximum size. Replace the blob with a BTree
    # node directory and distribute the blob entires into the sub-blobs of
    # the new BTree node.
    split_blob
    # Insert the passed object into the newly created BTree node.
    @btreedb.put_raw_object(raw, id)
  else
    bytes = raw.bytesize
    crc32 = Zlib.crc32(raw, 0)
    start_address = reserve_bytes(id, bytes, crc32)
    if write_to_blobs_file(raw, start_address) != bytes
      PEROBS.log.fatal 'Object length does not match written bytes'
    end
    write_index
  end
end