Class: Zizia::HyraxRecordImporter

Inherits:
RecordImporter show all
Defined in:
lib/zizia/hyrax/hyrax_record_importer.rb

Constant Summary collapse

DEFAULT_CREATOR_KEY =

TODO: Get this from Hyrax config

'[email protected]'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(attributes: {}) ⇒ HyraxRecordImporter

Returns a new instance of HyraxRecordImporter.

Examples:

attributes: { collection_id: '123',
              depositor_id: '456',
              batch_id: '789',
              deduplication_field: 'legacy_id'
            }

Parameters:

  • attributes (Hash) (defaults to: {})

    Attributes that come from the UI or importer rather than from the CSV/mapper. These are useful for logging and tracking the output of an import job for a given collection, user, or batch. If a deduplication_field is provided, the system will look for existing works with that field and matching value and will update the record instead of creating a new record.



48
49
50
51
52
53
54
55
56
57
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 48

def initialize(attributes: {})
  # These attributes are persisted in the CsvImportDetail model
  @csv_import_detail = attributes[:csv_import_detail]
  @deduplication_field = csv_import_detail.deduplication_field
  @collection_id = csv_import_detail.collection_id
  @batch_id = csv_import_detail.batch_id
  @success_count = csv_import_detail.success_count
  @failure_count = csv_import_detail.failure_count
  find_depositor(csv_import_detail.depositor_id)
end

Instance Attribute Details

#batch_idString

Returns an id number associated with the process that kicked off this import run.

Returns:

  • (String)

    an id number associated with the process that kicked off this import run



19
20
21
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 19

def batch_id
  @batch_id
end

#collection_idString

Returns The fedora ID for a Collection.

Returns:

  • (String)

    The fedora ID for a Collection.



15
16
17
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 15

def collection_id
  @collection_id
end

#csv_import_detailObject

Returns the value of attribute csv_import_detail.



7
8
9
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 7

def csv_import_detail
  @csv_import_detail
end

#deduplication_fieldString

and update the metadata instead of creating a new record. This will NOT re-import file attachments.

Returns:

  • (String)

    if this is set, look for records with a match in this field



24
25
26
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 24

def deduplication_field
  @deduplication_field
end

#depositorUser

Returns:

  • (User)


11
12
13
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 11

def depositor
  @depositor
end

#failure_countString

Returns the number of records this importer has failed to create.

Returns:

  • (String)

    the number of records this importer has failed to create



32
33
34
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 32

def failure_count
  @failure_count
end

#success_countString

Returns the number of records this importer has successfully created.

Returns:

  • (String)

    the number of records this importer has successfully created



28
29
30
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 28

def success_count
  @success_count
end

Instance Method Details

#create_upload_files(record) ⇒ Array

Create a Hyrax::UploadedFile for each file attachment TODO: What if we can’t find the file? TODO: How do we specify where the files can be found?

Parameters:

Returns:

  • (Array)

    an array of Hyrax::UploadedFile ids



116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 116

def create_upload_files(record)
  return unless record.mapper.respond_to?(:files)
  files_to_attach = record.mapper.files
  return [] if files_to_attach.nil? || files_to_attach.empty?

  uploaded_file_ids = []
  files_to_attach.each do |filename|
    file = File.open(find_file_path(filename))
    uploaded_file = Hyrax::UploadedFile.create(user: depositor, file: file)
    uploaded_file_ids << uploaded_file.id
    file.close
  end
  uploaded_file_ids
end

#file_attachments_pathObject

The path on disk where file attachments can be found



107
108
109
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 107

def file_attachments_path
  ENV['IMPORT_PATH'] || '/opt/data'
end

#find_depositor(user_key) ⇒ Object

“depositor” is a required field for Hyrax. If it hasn’t been set, set it to the Hyrax default batch user.



62
63
64
65
66
67
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 62

def find_depositor(user_key)
  user = ::User.find_by_user_key(user_key) if user_key
  user ||= ::User.find(user_key) if user_key
  user ||= ::User.find_or_create_system_user(DEFAULT_CREATOR_KEY)
  self.depositor = user
end

#find_existing_record(record) ⇒ ActiveFedora::Base

Search for any existing records that match on the deduplication_field

Parameters:

  • record (ImportRecord)

Returns:

  • (ActiveFedora::Base)


73
74
75
76
77
78
79
80
81
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 73

def find_existing_record(record)
  return unless deduplication_field
  return unless record.respond_to?(deduplication_field)
  return if record.mapper.send(deduplication_field).nil?
  return if record.mapper.send(deduplication_field).empty?
  existing_records = import_type.where("#{deduplication_field}": record.mapper.send(deduplication_field).to_s)
  raise "More than one record matches deduplication_field #{deduplication_field} with value #{record.mapper.send(deduplication_field)}" if existing_records.count > 1
  existing_records&.first
end

#find_file_path(filename) ⇒ String

Within the directory specified by ENV, find the first instance of a file matching the given filename. If there is no matching file, raise an exception.

Parameters:

  • filename (String)

Returns:

  • (String)

    a full pathname to the found file



137
138
139
140
141
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 137

def find_file_path(filename)
  filepath = Dir.glob("#{ENV['IMPORT_PATH']}/**/#{filename}").first
  raise "Cannot find file #{filename}... Are you sure it has been uploaded and that the filename matches?" if filepath.nil?
  filepath
end

#import(record:) ⇒ void

This method returns an undefined value.

Parameters:

  • record (ImportRecord)


87
88
89
90
91
92
93
94
95
96
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 87

def import(record:)
  existing_record = find_existing_record(record)
  create_for(record: record) unless existing_record
  update_for(existing_record: existing_record, update_record: record) if existing_record
rescue Faraday::ConnectionFailed, Ldp::HttpError => e
  Rails.logger.error "[zizia] #{e}"
rescue RuntimeError => e
  Rails.logger.error "[zizia] #{e}"
  raise e
end

#import_typeObject

TODO: You should be able to specify the import type in the import



99
100
101
102
103
104
# File 'lib/zizia/hyrax/hyrax_record_importer.rb', line 99

def import_type
  raise 'No curation_concern found for import' unless
    defined?(Hyrax) && Hyrax&.config&.curation_concerns&.any?

  Hyrax.config.curation_concerns.first
end