Class: Dor::SdrIngestService
- Inherits:
-
Object
- Object
- Dor::SdrIngestService
- Defined in:
- lib/dor/services/sdr_ingest_service.rb
Class Method Summary collapse
-
.extract_datastreams(dor_item, workspace) ⇒ Pathname
Pull all the datastreams specified in the configuration file into the workspace’s metadata directory, overwriting existing file if present.
-
.get_content_inventory(metadata_dir, druid, version_id) ⇒ Moab::FileInventory
Parse the contentMetadata and generate a new version inventory object containing a content group.
-
.get_content_metadata(metadata_dir) ⇒ String
Return the contents of the contentMetadata.xml file from the content directory.
-
.get_datastream_content(dor_item, ds_name, required) ⇒ String
Return the xml text of the specified datastream if it exists.
-
.get_metadata_file_group(metadata_dir) ⇒ Moab::FileGroup
Traverse the metadata directory and generate a metadata group.
-
.get_signature_catalog(druid) ⇒ Moab::SignatureCatalog
The catalog of all files previously ingested.
-
.get_version_inventory(metadata_dir, druid, version_id) ⇒ Moab::FileInventory
Generate and return a version inventory for the object.
-
.transfer(dor_item, agreement_id = nil) ⇒ void
Create the moab manifests, export data to a BagIt bag, kick off the SDR ingest workflow.
-
.verify_bag_structure(bag_dir) ⇒ Boolean
True if all required files exist, raises exception if not.
-
.verify_pathname(pathname) ⇒ Boolean
True if file exists, raises exception if not.
- .verify_version_id(pathname, expected, found) ⇒ Object
- .verify_version_metadata(metadata_dir, expected) ⇒ Object
-
.vmfile_version_id(pathname) ⇒ Integer
The versionId found in the last version element, or nil if missing.
Class Method Details
.extract_datastreams(dor_item, workspace) ⇒ Pathname
59 60 61 62 63 64 65 66 67 68 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 59 def self.extract_datastreams(dor_item, workspace) = Pathname.new(workspace.path('metadata', true)) Config.sdr.datastreams.to_hash.each_pair do |ds_name, required| ds_name = ds_name.to_s = .join("#{ds_name}.xml") = get_datastream_content(dor_item, ds_name, required) .open('w') { |f| f << } if end end |
.get_content_inventory(metadata_dir, druid, version_id) ⇒ Moab::FileInventory
127 128 129 130 131 132 133 134 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 127 def self.get_content_inventory(, druid, version_id) = () if Stanford::ContentInventory.new.inventory_from_cm(, druid, 'preserve', version_id) else FileInventory.new(:type => 'version', :digital_object_id => druid, :version_id => version_id) end end |
.get_content_metadata(metadata_dir) ⇒ String
138 139 140 141 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 138 def self.() = .join('contentMetadata.xml') .read if .exist? end |
.get_datastream_content(dor_item, ds_name, required) ⇒ String
75 76 77 78 79 80 81 82 83 84 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 75 def self.get_datastream_content(dor_item, ds_name, required) ds = (ds_name == 'relationshipMetadata' ? 'RELS-EXT' : ds_name) if dor_item.datastreams.keys.include?(ds) && !dor_item.datastreams[ds].new? return dor_item.datastreams[ds].content elsif required == 'optional' return nil else raise "required datastream #{ds_name} not found in DOR" end end |
.get_metadata_file_group(metadata_dir) ⇒ Moab::FileGroup
145 146 147 148 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 145 def self.() file_group = FileGroup.new(:group_id => 'metadata').group_from_directory() file_group end |
.get_signature_catalog(druid) ⇒ Moab::SignatureCatalog
46 47 48 49 50 51 52 53 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 46 def self.get_signature_catalog(druid) sdr_client = Dor::Config.sdr.rest_client url = "objects/#{druid}/manifest/signatureCatalog.xml" response = sdr_client[url].get Moab::SignatureCatalog.parse(response) rescue RestClient::ResourceNotFound Moab::SignatureCatalog.new(:digital_object_id => druid, :version_id => 0) end |
.get_version_inventory(metadata_dir, druid, version_id) ⇒ Moab::FileInventory
116 117 118 119 120 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 116 def self.get_version_inventory(, druid, version_id) version_inventory = get_content_inventory(, druid, version_id) version_inventory.groups << () version_inventory end |
.transfer(dor_item, agreement_id = nil) ⇒ void
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 9 def self.transfer(dor_item, agreement_id = nil) druid = dor_item.pid workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root) signature_catalog = get_signature_catalog(druid) new_version_id = signature_catalog.version_id + 1 = extract_datastreams(dor_item, workspace) (, new_version_id) version_inventory = get_version_inventory(, druid, new_version_id) version_addtions = signature_catalog.version_additions(version_inventory) content_addtions = version_addtions.group('content') if content_addtions.nil? || content_addtions.files.empty? content_dir = nil else new_file_list = content_addtions.path_list content_dir = workspace.find_filelist_parent('content', new_file_list) end content_group = version_inventory.group('content') unless content_group.nil? || content_group.files.empty? signature_catalog.normalize_group_signatures(content_group, content_dir) end # export the bag (in tar format) bag_dir = Pathname(Dor::Config.sdr.local_export_home).join(druid.sub('druid:', '')) bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir) bagger.reset_bag bagger.create_bag_inventory(:depositor) bagger.deposit_group('content', content_dir) bagger.deposit_group('metadata', ) bagger.create_tagfiles verify_bag_structure(bag_dir) # Now bootstrap SDR workflow. but do not create the workflows datastream dor_item.initialize_workflow('sdrIngestWF', false) rescue Exception => e raise LyberCore::Exceptions::ItemError.new(druid, 'Export failure', e) end |
.verify_bag_structure(bag_dir) ⇒ Boolean
152 153 154 155 156 157 158 159 160 161 162 163 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 152 def self.verify_bag_structure(bag_dir) verify_pathname(bag_dir) verify_pathname(bag_dir.join('data')) verify_pathname(bag_dir.join('bagit.txt')) verify_pathname(bag_dir.join('bag-info.txt')) verify_pathname(bag_dir.join('manifest-sha256.txt')) verify_pathname(bag_dir.join('tagmanifest-sha256.txt')) verify_pathname(bag_dir.join('versionAdditions.xml')) verify_pathname(bag_dir.join('versionInventory.xml')) verify_pathname(bag_dir.join('data', 'metadata', 'versionMetadata.xml')) true end |
.verify_pathname(pathname) ⇒ Boolean
167 168 169 170 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 167 def self.verify_pathname(pathname) raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist? true end |
.verify_version_id(pathname, expected, found) ⇒ Object
97 98 99 100 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 97 def self.verify_version_id(pathname, expected, found) raise "Version mismatch in #{pathname}, expected #{expected}, found #{found}" unless expected == found true end |
.verify_version_metadata(metadata_dir, expected) ⇒ Object
88 89 90 91 92 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 88 def self.(, expected) vmfile = .join('versionMetadata.xml') verify_version_id(vmfile, expected, vmfile_version_id(vmfile)) true end |
.vmfile_version_id(pathname) ⇒ Integer
104 105 106 107 108 109 110 |
# File 'lib/dor/services/sdr_ingest_service.rb', line 104 def self.vmfile_version_id(pathname) verify_pathname(pathname) doc = Nokogiri::XML(File.open(pathname.to_s)) nodeset = doc.xpath('/versionMetadata/version') version_id = nodeset.last['versionId'] version_id.nil? ? nil : version_id.to_i end |