Class: Dor::SdrIngestService

Inherits:
Object
  • Object
show all
Defined in:
lib/dor/services/sdr_ingest_service.rb

Overview

Note: This should probably live in common-accessioning robot sdr-ingest-transfer

as that is the only robot that uses it.  See also preservable concern.

Class Method Summary collapse

Class Method Details

.extract_datastreams(dor_item, workspace) ⇒ Pathname



57
58
59
60
61
62
63
64
65
66
# File 'lib/dor/services/sdr_ingest_service.rb', line 57

def self.extract_datastreams(dor_item, workspace)
   = Pathname.new(workspace.path('metadata', true))
  Config.sdr.datastreams.to_hash.each_pair do |ds_name, required|
    ds_name = ds_name.to_s
     = .join("#{ds_name}.xml")
     = get_datastream_content(dor_item, ds_name, required)
    .open('w') { |f| f <<  } if 
  end
  
end

.get_content_inventory(metadata_dir, druid, version_id) ⇒ Moab::FileInventory



126
127
128
129
130
131
132
133
# File 'lib/dor/services/sdr_ingest_service.rb', line 126

def self.get_content_inventory(, druid, version_id)
   = ()
  if 
    Stanford::ContentInventory.new.inventory_from_cm(, druid, 'preserve', version_id)
  else
    Moab::FileInventory.new(type: 'version', digital_object_id: druid, version_id: version_id)
  end
end

.get_content_metadata(metadata_dir) ⇒ String



137
138
139
140
# File 'lib/dor/services/sdr_ingest_service.rb', line 137

def self.()
   = .join('contentMetadata.xml')
  .read if .exist?
end

.get_datastream_content(dor_item, ds_name, required) ⇒ String



73
74
75
76
77
78
79
80
81
82
# File 'lib/dor/services/sdr_ingest_service.rb', line 73

def self.get_datastream_content(dor_item, ds_name, required)
  ds = (ds_name == 'relationshipMetadata' ? 'RELS-EXT' : ds_name)
  if dor_item.datastreams.key?(ds) && !dor_item.datastreams[ds].new?
    return dor_item.datastreams[ds].content
  elsif required == 'optional'
    return nil
  else
    raise "required datastream #{ds_name} not found in DOR"
  end
end

.get_metadata_file_group(metadata_dir) ⇒ Moab::FileGroup



144
145
146
147
# File 'lib/dor/services/sdr_ingest_service.rb', line 144

def self.()
  file_group = Moab::FileGroup.new(group_id: 'metadata').group_from_directory()
  file_group
end

.get_signature_catalog(druid) ⇒ Moab::SignatureCatalog



49
50
51
# File 'lib/dor/services/sdr_ingest_service.rb', line 49

def self.get_signature_catalog(druid)
  Sdr::Client.get_signature_catalog(druid)
end

.get_version_inventory(metadata_dir, druid, version_id) ⇒ Moab::FileInventory



115
116
117
118
119
# File 'lib/dor/services/sdr_ingest_service.rb', line 115

def self.get_version_inventory(, druid, version_id)
  version_inventory = get_content_inventory(, druid, version_id)
  version_inventory.groups << ()
  version_inventory
end

.transfer(dor_item, _agreement_id = nil) ⇒ void



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/dor/services/sdr_ingest_service.rb', line 12

def self.transfer(dor_item, _agreement_id = nil)
  druid = dor_item.pid
  workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
  signature_catalog = get_signature_catalog(druid)
  new_version_id = signature_catalog.version_id + 1
   = extract_datastreams(dor_item, workspace)
  (, new_version_id)
  version_inventory = get_version_inventory(, druid, new_version_id)
  version_addtions = signature_catalog.version_additions(version_inventory)
  content_addtions = version_addtions.group('content')
  if content_addtions.nil? || content_addtions.files.empty?
    content_dir = nil
  else
    new_file_list = content_addtions.path_list
    content_dir = workspace.find_filelist_parent('content', new_file_list)
  end
  content_group = version_inventory.group('content')
  signature_catalog.normalize_group_signatures(content_group, content_dir) unless content_group.nil? || content_group.files.empty?
  # export the bag (in tar format)
  bag_dir = Pathname(Dor::Config.sdr.local_export_home).join(druid.sub('druid:', ''))
  bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir)
  bagger.reset_bag
  bagger.create_bag_inventory(:depositor)
  bagger.deposit_group('content', content_dir)
  bagger.deposit_group('metadata', )
  bagger.create_tagfiles
  verify_bag_structure(bag_dir)
  # start SDR preservation workflow (but do not create the workflows datastream)
  CreateWorkflowService.create_workflow(dor_item, name: 'preservationIngestWF', create_ds: false)
rescue Exception => e
  raise Dor::Exception, "Error exporting new object version to bag: #{e.message}"
end

.verify_bag_structure(bag_dir) ⇒ Boolean



151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/dor/services/sdr_ingest_service.rb', line 151

def self.verify_bag_structure(bag_dir)
  verify_pathname(bag_dir)
  verify_pathname(bag_dir.join('data'))
  verify_pathname(bag_dir.join('bagit.txt'))
  verify_pathname(bag_dir.join('bag-info.txt'))
  verify_pathname(bag_dir.join('manifest-sha256.txt'))
  verify_pathname(bag_dir.join('tagmanifest-sha256.txt'))
  verify_pathname(bag_dir.join('versionAdditions.xml'))
  verify_pathname(bag_dir.join('versionInventory.xml'))
  verify_pathname(bag_dir.join('data', 'metadata', 'versionMetadata.xml'))
  true
end

.verify_pathname(pathname) ⇒ Boolean



166
167
168
169
170
# File 'lib/dor/services/sdr_ingest_service.rb', line 166

def self.verify_pathname(pathname)
  raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?

  true
end

.verify_version_id(pathname, expected, found) ⇒ Object



95
96
97
98
99
# File 'lib/dor/services/sdr_ingest_service.rb', line 95

def self.verify_version_id(pathname, expected, found)
  raise "Version mismatch in #{pathname}, expected #{expected}, found #{found}" unless expected == found

  true
end

.verify_version_metadata(metadata_dir, expected) ⇒ Object



86
87
88
89
90
# File 'lib/dor/services/sdr_ingest_service.rb', line 86

def self.(, expected)
  vmfile = .join('versionMetadata.xml')
  verify_version_id(vmfile, expected, vmfile_version_id(vmfile))
  true
end

.vmfile_version_id(pathname) ⇒ Integer



103
104
105
106
107
108
109
# File 'lib/dor/services/sdr_ingest_service.rb', line 103

def self.vmfile_version_id(pathname)
  verify_pathname(pathname)
  doc = Nokogiri::XML(File.open(pathname.to_s))
  nodeset = doc.xpath('/versionMetadata/version')
  version_id = nodeset.last['versionId']
  version_id.nil? ? nil : version_id.to_i
end