Class: Bulkrax::Importer
Overview
rubocop:disable Metrics/ClassLength
Constant Summary
collapse
- DEFAULT_OBJECT_TYPES =
%w[collection work file_set relationship].freeze
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
Methods included from StatusInfo
#current_status, #failed?, #last_error, #set_status_info, #skipped?, #status_at, #succeeded?
#file?, #increment_counters, #key_without_numbers, #keys_without_numbers, #last_imported_at, #next_import_at, #parser, #parser_class, #zip?
Instance Attribute Details
#current_run(skip_counts: false) ⇒ Object
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
# File 'app/models/bulkrax/importer.rb', line 112
def current_run(skip_counts: false)
return @current_run if @current_run.present?
@current_run = self.importer_runs.create!
return @current_run if file? && zip?
return @current_run if skip_counts
entry_counts = {
total_work_entries: self.limit || parser.works_total,
total_collection_entries: parser.collections_total,
total_file_set_entries: parser.file_sets_total
}
@current_run.update!(entry_counts)
@current_run
end
|
#file ⇒ Object
Returns the value of attribute file.
29
30
31
|
# File 'app/models/bulkrax/importer.rb', line 29
def file
@file
end
|
#file_style ⇒ Object
Returns the value of attribute file_style.
29
30
31
|
# File 'app/models/bulkrax/importer.rb', line 29
def file_style
@file_style
end
|
#only_updates ⇒ Object
Returns the value of attribute only_updates.
29
30
31
|
# File 'app/models/bulkrax/importer.rb', line 29
def only_updates
@only_updates
end
|
Class Method Details
.frequency_enums ⇒ Object
92
93
94
95
96
97
|
# File 'app/models/bulkrax/importer.rb', line 92
def self.frequency_enums
[['Daily', 'P1D'], ['Monthly', 'P1M'], ['Yearly', 'P1Y'], ['Once (on save)', 'PT0S']]
end
|
.safe_uri_filename(uri) ⇒ Object
32
33
34
35
36
37
38
39
|
# File 'app/models/bulkrax/importer.rb', line 32
def self.safe_uri_filename(uri)
r = Faraday.head(uri.to_s)
return CGI.parse(r.['content-disposition'])["filename"][0].delete("\"")
rescue
filename = File.basename(uri.to_s)
filename.delete!('/')
filename.presence || SecureRandom.uuid
end
|
Instance Method Details
#completed_statuses ⇒ Object
150
151
152
153
154
|
# File 'app/models/bulkrax/importer.rb', line 150
def completed_statuses
@completed_statuses ||= Bulkrax::Status.latest_by_statusable
.includes(:statusable)
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Complete')
end
|
#default_field_mapping ⇒ Object
78
79
80
81
82
83
84
85
86
|
# File 'app/models/bulkrax/importer.rb', line 78
def default_field_mapping
return self.field_mapping if parser.import_fields.nil?
ActiveSupport::HashWithIndifferentAccess.new(
parser.import_fields.reject(&:nil?).map do |m|
Bulkrax.default_field_mapping.call(m)
end.inject(:merge)
)
end
|
#errored_entries_csv_path ⇒ Object
257
258
259
|
# File 'app/models/bulkrax/importer.rb', line 257
def errored_entries_csv_path
@errored_entries_csv_path ||= File.join(parser.base_path, "import_#{path_string}_errored_entries.csv")
end
|
#existing_entries? ⇒ Boolean
188
189
190
|
# File 'app/models/bulkrax/importer.rb', line 188
def existing_entries?
parser.parser_fields['file_style']&.match(/Existing Entries/)
end
|
#failed_entries? ⇒ Boolean
133
134
135
|
# File 'app/models/bulkrax/importer.rb', line 133
def failed_entries?
entries.failed.any?
end
|
#failed_messages ⇒ Object
143
144
145
146
147
148
|
# File 'app/models/bulkrax/importer.rb', line 143
def failed_messages
failed_statuses.each_with_object({}) do |e, i|
i[e.error_message] ||= []
i[e.error_message] << e.id
end
end
|
#failed_statuses ⇒ Object
137
138
139
140
141
|
# File 'app/models/bulkrax/importer.rb', line 137
def failed_statuses
@failed_statuses ||= Bulkrax::Status.latest_by_statusable
.includes(:statusable)
.where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Failed')
end
|
#frequency ⇒ Object
103
104
105
106
|
# File 'app/models/bulkrax/importer.rb', line 103
def frequency
f = self[:frequency] || "PT0S"
ISO8601::Duration.new(f)
end
|
#frequency=(frequency) ⇒ Object
99
100
101
|
# File 'app/models/bulkrax/importer.rb', line 99
def frequency=(frequency)
self[:frequency] = ISO8601::Duration.new(frequency).to_s
end
|
#import_collections ⇒ Object
196
197
198
|
# File 'app/models/bulkrax/importer.rb', line 196
def import_collections
import_objects(['collection'])
end
|
#import_file_path ⇒ Object
160
161
162
|
# File 'app/models/bulkrax/importer.rb', line 160
def import_file_path
self.parser_fields['import_file_path']
end
|
#import_file_sets ⇒ Object
200
201
202
|
# File 'app/models/bulkrax/importer.rb', line 200
def import_file_sets
import_objects(['file_set'])
end
|
The format for metadata for the incoming import; corresponds to an Entry class
235
236
237
|
# File 'app/models/bulkrax/importer.rb', line 235
def import_metadata_format
[['CSV', 'Bulkrax::CsvEntry'], ['RDF (N-Triples)', 'Bulkrax::RdfEntry']]
end
|
#import_objects(types_array = nil) ⇒ Object
210
211
212
213
214
215
216
217
218
|
# File 'app/models/bulkrax/importer.rb', line 210
def import_objects(types_array = nil)
self.only_updates ||= false
self.save if self.new_record?
types = types_array || DEFAULT_OBJECT_TYPES
existing_entries? ? parser.rebuild_entries(types) : parser.create_objects(types)
mark_unseen_as_skipped
rescue StandardError => e
set_status_info(e)
end
|
#import_relationships ⇒ Object
204
205
206
|
# File 'app/models/bulkrax/importer.rb', line 204
def import_relationships
import_objects(['relationship'])
end
|
#import_works ⇒ Object
192
193
194
|
# File 'app/models/bulkrax/importer.rb', line 192
def import_works
import_objects(['work'])
end
|
#importer_unzip_path(mkdir: false) ⇒ Object
If the import data is zipped, unzip it to this path
245
246
247
248
249
250
251
252
253
254
255
|
# File 'app/models/bulkrax/importer.rb', line 245
def importer_unzip_path(mkdir: false)
@importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
return @importer_unzip_path if Dir.exist?(@importer_unzip_path) || mkdir == true
base_importer_unzip_path = @importer_unzip_path.split('_')[0...-1].join('_')
@importer_unzip_path = Dir.glob(base_importer_unzip_path + '*').sort_by { |path| path.split(base_importer_unzip_path).last[1..-1].to_i }.last
end
|
#last_run ⇒ Object
129
130
131
|
# File 'app/models/bulkrax/importer.rb', line 129
def last_run
@last_run ||= self.importer_runs.last
end
|
#mapping ⇒ Object
If field_mapping is empty, setup a default based on the export_properties
65
66
67
68
69
70
71
72
73
74
75
76
|
# File 'app/models/bulkrax/importer.rb', line 65
def mapping
@mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
if parser.import_fields.present? || self.field_mapping == [{}]
default_field_mapping
end
else
default_field_mapping.merge(self.field_mapping)
end
end
|
#mark_unseen_as_skipped ⇒ Object
After an import any entries we did not touch are skipped. They are not really pending, complete for the last run, or failed
222
223
224
225
226
|
# File 'app/models/bulkrax/importer.rb', line 222
def mark_unseen_as_skipped
entries.where.not(identifier: seen.keys).find_each do |entry|
entry.set_status_info('Skipped')
end
end
|
184
185
186
|
# File 'app/models/bulkrax/importer.rb', line 184
def metadata_only?
parser.parser_fields['metadata_only'] == true
end
|
#original_file ⇒ Object
168
169
170
|
# File 'app/models/bulkrax/importer.rb', line 168
def original_file
import_file_path if original_file?
end
|
#original_file? ⇒ Boolean
164
165
166
|
# File 'app/models/bulkrax/importer.rb', line 164
def original_file?
import_file_path && File.exist?(import_file_path)
end
|
#parser_fields ⇒ Object
88
89
90
|
# File 'app/models/bulkrax/importer.rb', line 88
def parser_fields
self[:parser_fields] || {}
end
|
#path_string ⇒ Object
261
262
263
264
265
|
# File 'app/models/bulkrax/importer.rb', line 261
def path_string
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}_#{self.importer_runs.last.id}"
rescue
"#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
end
|
#record_status ⇒ Object
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
# File 'app/models/bulkrax/importer.rb', line 49
def record_status
importer_run = ImporterRun.find(current_run.id)
return if importer_run.enqueued_records.positive?
if importer_run.failed_records.positive?
if importer_run.invalid_records.present?
e = Bulkrax::ImportFailed.new('Failed with Invalid Records', importer_run.invalid_records.split("\n"))
importer_run.importer.set_status_info(e)
else
importer_run.importer.set_status_info('Complete (with failures)')
end
else
importer_run.importer.set_status_info('Complete')
end
end
|
#remove_and_rerun ⇒ Object
180
181
182
|
# File 'app/models/bulkrax/importer.rb', line 180
def remove_and_rerun
self.parser_fields['remove_and_rerun']
end
|
#replace_files ⇒ Object
172
173
174
|
# File 'app/models/bulkrax/importer.rb', line 172
def replace_files
self.parser_fields['replace_files']
end
|
#schedulable? ⇒ Boolean
108
109
110
|
# File 'app/models/bulkrax/importer.rb', line 108
def schedulable?
frequency.to_seconds != 0
end
|
#seen ⇒ Object
156
157
158
|
# File 'app/models/bulkrax/importer.rb', line 156
def seen
@seen ||= {}
end
|
#status ⇒ Object
41
42
43
44
45
46
47
|
# File 'app/models/bulkrax/importer.rb', line 41
def status
if self.validate_only
'Validated'
else
super
end
end
|
#unique_collection_identifier(id) ⇒ Object
Prepend the base_url to ensure unique set identifiers
230
231
232
|
# File 'app/models/bulkrax/importer.rb', line 230
def unique_collection_identifier(id)
"#{self.parser_fields['base_url'].split('/')[2]}_#{id}"
end
|
#update_files ⇒ Object
176
177
178
|
# File 'app/models/bulkrax/importer.rb', line 176
def update_files
self.parser_fields['update_files']
end
|