Class: Moab::FileInventory

Inherits:
Serializer::Manifest show all
Includes:
HappyMapper
Defined in:
lib/moab/file_inventory.rb

Overview

Note:

Copyright © 2012 by The Board of Trustees of the Leland Stanford Junior University. All rights reserved. See LICENSE for details.

A structured container for recording information about a collection of related files.

The scope of the file collection depends on inventory type:

  • version = full set of data files comprising a digital object’s version

  • additions = subset of data files that were newly added in the specified version

  • manifests = the fixity data for manifest files in the version’s root folder

  • directory = set of files that were harvested from a filesystem directory

The inventory contains one or more FileGroup subsets, which are most commonly used to provide segregation of digital object version’s content and metadata files. Each group contains one or more FileManifestation entities, each of which represents a point-in-time snapshot of a given file’s filesystem characteristics. The fixity data for a file is stored in a FileSignature element, while the filename and modification data are stored in one or more FileInstance elements. (Copies of a given file may be present in multiple locations in a collection)

Data Model

  • FileInventory = container for recording information about a collection of related files

    • FileGroup [1..*] = subset allow segregation of content and metadata files

      • FileManifestation [1..*] = snapshot of a file’s filesystem characteristics

        • FileSignature [1] = file fixity information

        • FileInstance [1..*] = filepath and timestamp of any physical file having that signature

Examples:


Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Serializer::Manifest

read_xml_file, write_xml_file, xml_pathname, xml_pathname_exist?

Methods inherited from Serializer::Serializable

#array_to_hash, deep_diff, #diff, #key, #key_name, #summary, #to_hash, #to_json, #to_yaml, #variable_names, #variables

Constructor Details

#initialize(opts = {}) ⇒ FileInventory



39
40
41
42
43
# File 'lib/moab/file_inventory.rb', line 39

def initialize(opts={})
  @groups = Array.new
  @inventory_datetime = Time.now
  super(opts)
end

Instance Attribute Details

#block_countInteger



92
# File 'lib/moab/file_inventory.rb', line 92

attribute :block_count, Integer, :tag => 'blockCount', :on_save => Proc.new {|t| t.to_s}

#digital_object_idString



51
# File 'lib/moab/file_inventory.rb', line 51

attribute :digital_object_id, String, :tag => 'objectId'

#file_countInteger



76
# File 'lib/moab/file_inventory.rb', line 76

attribute :file_count, Integer, :tag => 'fileCount', :on_save => Proc.new {|t| t.to_s}

#groupsArray<FileGroup>



100
# File 'lib/moab/file_inventory.rb', line 100

has_many :groups, FileGroup, :tag => 'fileGroup'

#inventory_datetimeString



64
# File 'lib/moab/file_inventory.rb', line 64

attribute :inventory_datetime, String, :tag => 'inventoryDatetime'

#typeString



47
# File 'lib/moab/file_inventory.rb', line 47

attribute :type, String

#version_idInteger



55
# File 'lib/moab/file_inventory.rb', line 55

attribute :version_id, Integer, :tag => 'versionId', :key => true, :on_save => Proc.new {|n| n.to_s}

Class Method Details

.xml_filename(type = nil) ⇒ String

Returns The standard name for the serialized inventory file of the given type.



256
257
258
259
260
261
262
263
264
265
266
267
268
269
# File 'lib/moab/file_inventory.rb', line 256

def self.xml_filename(type=nil)
  case type
    when "version"
      'versionInventory.xml'
    when "additions"
      'versionAdditions.xml'
    when "manifests"
      'manifestInventory.xml'
    when "directory"
      'directoryInventory.xml'
    else
      raise ArgumentError, "unknown inventory type: #{type}"
  end
end

Instance Method Details

#byte_countInteger



84
# File 'lib/moab/file_inventory.rb', line 84

attribute :byte_count, Integer, :tag => 'byteCount', :on_save => Proc.new {|t| t.to_s}

#composite_keyString



58
59
60
# File 'lib/moab/file_inventory.rb', line 58

def composite_key
  @digital_object_id + '-' + StorageObject.version_dirname(@version_id)
end

#copy_ids(other) ⇒ void

This method returns an undefined value.

Returns Copy objectId and versionId values from another class instance into this instance.



148
149
150
151
152
# File 'lib/moab/file_inventory.rb', line 148

def copy_ids(other)
  @digital_object_id = other.digital_object_id
  @version_id = other.version_id
  @inventory_datetime = other.inventory_datetime
end

#data_sourceString

Returns either the version ID (if inventory is a version manifest) or the name of the directory that was harvested to create the inventory



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/moab/file_inventory.rb', line 163

def data_source
  data_source = (groups.collect { |g| g.data_source.to_s }).join('|')
  if data_source.start_with?('contentMetadata')
    if version_id
      "v#{version_id}-#{data_source}"
    else
      "new-#{data_source}"
    end
  else
    if version_id
      "v#{version_id}"
    else
      data_source
    end

  end
end

#file_signature(group_id, file_id) ⇒ FileSignature

Returns The signature of the specified file.



135
136
137
138
139
140
141
142
143
# File 'lib/moab/file_inventory.rb', line 135

def file_signature(group_id, file_id)
  file_group = group(group_id)
  errmsg = "group #{group_id} not found for #{@digital_object_id} - #{@version_id}"
  raise FileNotFoundException, errmsg if file_group.nil?
  file_signature = file_group.path_hash[file_id]
  errmsg = "#{group_id} file #{file_id} not found for #{@digital_object_id} - #{@version_id}"
  raise FileNotFoundException, errmsg if file_signature.nil?
  file_signature
end

#group(group_id) ⇒ FileGroup



116
117
118
# File 'lib/moab/file_inventory.rb', line 116

def group(group_id)
  @groups.find{ |group| group.group_id == group_id}
end

#group_empty?(group_id) ⇒ Boolean



122
123
124
125
# File 'lib/moab/file_inventory.rb', line 122

def group_empty?(group_id)
  group = self.group(group_id)
  group.nil? or group.files.empty?
end

#group_ids(non_empty = nil) ⇒ Array<String>



109
110
111
112
# File 'lib/moab/file_inventory.rb', line 109

def group_ids(non_empty=nil)
  groups = non_empty ? self.non_empty_groups : @groups
  groups.map{|group| group.group_id}
end

#human_sizeString

Returns The total size of the inventory expressed in KB, MB, GB or TB, depending on the magnitutde of the value.



239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/moab/file_inventory.rb', line 239

def human_size
  count = 0
  size = byte_count
  while size >= 1024 and count < 4
    size /= 1024.0
    count += 1
  end
  if count == 0
    sprintf("%d B", size)
  else
    sprintf("%.2f %s", size, %w[B KB MB GB TB][count])
  end
end

#inventory_from_bagit_bag(bag_dir) ⇒ FileInventory



201
202
203
204
205
206
207
208
209
# File 'lib/moab/file_inventory.rb', line 201

def inventory_from_bagit_bag(bag_dir)
  bag_pathname = Pathname(bag_dir)
  signatures_from_bag = signatures_from_bagit_manifests(bag_pathname)
  bag_data_subdirs = bag_pathname.join('data').children
  bag_data_subdirs.each do |subdir|
    @groups << FileGroup.new(:group_id=>subdir.basename.to_s).group_from_bagit_subdir(subdir, signatures_from_bag)
  end
  self
end

#inventory_from_directory(data_dir, group_id = nil) ⇒ FileInventory

Returns Traverse a directory and return an inventory of the files it contains.

Examples:




187
188
189
190
191
192
193
194
195
196
# File 'lib/moab/file_inventory.rb', line 187

def inventory_from_directory(data_dir, group_id=nil)
  if group_id
    @groups << FileGroup.new(group_id: group_id).group_from_directory(data_dir)
  else
    ['content', 'metadata'].each do |gid|
      @groups << FileGroup.new(group_id: gid).group_from_directory(Pathname(data_dir).join(gid))
    end
  end
  self
end

#non_empty_groupsArray<FileGroup] The set of data groups that contain files



103
104
105
# File 'lib/moab/file_inventory.rb', line 103

def non_empty_groups
  @groups.select{|group| !group.files.empty?}
end

#package_idString

Returns Concatenation of the objectId and versionId values.



156
157
158
# File 'lib/moab/file_inventory.rb', line 156

def package_id
  "#{@digital_object_id}-v#{@version_id}"
end

#signatures_from_bagit_manifests(bag_pathname) ⇒ Hash<Pathname,FileSignature>



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# File 'lib/moab/file_inventory.rb', line 213

def signatures_from_bagit_manifests(bag_pathname)
  manifest_pathname = Hash.new
  checksum_types =  [:md5, :sha1, :sha256]
  checksum_types.each do |type|
    manifest_pathname[type] = bag_pathname.join("manifest-#{type}.txt")
  end
  signatures = Hash.new { |hash,path| hash[path] = FileSignature.new }
  checksum_types.each do |type|
    if manifest_pathname[type].exist?
      manifest_pathname[type].each_line do |line|
        line.chomp!
        checksum,data_path = line.split(/\s+\**/,2)
        if checksum && data_path
          file_pathname = bag_pathname.join(data_path)
          signature = signatures[file_pathname]
          signature.set_checksum(type, checksum)
        end
      end
    end
  end
  signatures.each {|file_pathname,signature| signature.size = file_pathname.size}
  signatures
end

#summary_fieldsArray<String>



128
129
130
# File 'lib/moab/file_inventory.rb', line 128

def summary_fields
  %w{type digital_object_id version_id inventory_datetime file_count byte_count block_count groups}
end

#write_xml_file(parent_dir, type = nil) ⇒ void

This method returns an undefined value.

Returns write the Moab::FileInventory instance to a file.

Examples:




276
277
278
279
# File 'lib/moab/file_inventory.rb', line 276

def write_xml_file(parent_dir, type=nil)
  type = @type if type.nil?
  self.class.write_xml_file(self, parent_dir, type)
end