Class: Bulkrax::CsvEntry
- Inherits:
-
Entry
show all
- Defined in:
- app/models/bulkrax/csv_entry.rb
Overview
TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense. We do too much in these entry classes. We need to extract the common logic from the various entry models into a module that can be shared between them.
Defined Under Namespace
Modules: AttributeBuilderMethod
Classes: CsvPathError, CsvWrapper, MissingMetadata, RecordNotFound
Instance Attribute Summary
Attributes inherited from Entry
#all_attrs
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Entry
#build, #exporter?, #fetch_field_mapping, #find_collection, #importer?, #last_run, parent_field, #source_identifier, #work_identifier
#add_local
Methods included from StatusInfo
#current_status, #failed?, #last_error, #set_status_info, #skipped?, #status, #status_at, #succeeded?
#build_for_exporter, #file_extension, #filename, #hyrax_record
#active_id_for_authority?, #add_admin_set_id, #add_collections, #add_rights_statement, #add_user_to_permission_templates!, #add_visibility, #build_for_importer, #child_jobs, #factory, #factory_class, #override_rights_statement, #parent_jobs, #rights_statement, #sanitize_controlled_uri_value, #sanitize_controlled_uri_values!, #validate_value
#add_metadata, #excluded?, #field_supported?, #field_to, #fields_that_are_always_multiple, #fields_that_are_always_singular, #get_object_name, #matched_metadata, #multiple?, #multiple_metadata, #schema_form_definitions, #set_parsed_data, #set_parsed_object_data, #single_metadata, #supported_bulkrax_fields
Class Method Details
.data_for_entry(data, _source_id, parser) ⇒ Object
80
81
82
83
84
85
86
87
88
89
90
|
# File 'app/models/bulkrax/csv_entry.rb', line 80
def self.data_for_entry(data, _source_id, parser)
data = data.first if data.is_a?(CSV::Table)
raw_data = data.to_h
raw_data[:model] = data[:model] if data[:model].present?
raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
return raw_data
end
|
.fields_from_data(data) ⇒ Object
32
33
34
|
# File 'app/models/bulkrax/csv_entry.rb', line 32
def self.fields_from_data(data)
data..flatten.compact.uniq
end
|
.matcher_class ⇒ Object
378
379
380
|
# File 'app/models/bulkrax/csv_entry.rb', line 378
def self.matcher_class
Bulkrax::CsvMatcher
end
|
.read_data(path) ⇒ Object
there’s a risk that this reads the whole file into memory and could cause a memory leak we strip any special characters out of the headers. looking at you Excel
40
41
42
43
44
45
46
47
48
49
50
|
# File 'app/models/bulkrax/csv_entry.rb', line 40
def self.read_data(path)
raise CsvPathError, 'CSV path empty' if path.blank?
options = {
headers: true,
header_converters: ->(h) { h.to_s.gsub(/[^\w\d\. -]+/, '').strip.to_sym },
encoding: 'utf-8'
}.merge(csv_read_data_options)
results = CSV.read(path, **options)
csv_wrapper_class.new(results)
end
|
Instance Method Details
#add_file ⇒ Object
159
160
161
162
163
164
165
166
167
168
169
170
171
|
# File 'app/models/bulkrax/csv_entry.rb', line 159
def add_file
self.parsed_metadata['file'] ||= []
if record['file']&.is_a?(String)
self.parsed_metadata['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
elsif record['file'].is_a?(Array)
self.parsed_metadata['file'] = record['file']
end
self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f|
next if f.blank?
path_to_file(f.tr(' ', '_'))
end.compact
end
|
#add_identifier ⇒ Object
126
127
128
|
# File 'app/models/bulkrax/csv_entry.rb', line 126
def add_identifier
self.parsed_metadata[work_identifier] = [record[source_identifier]]
end
|
150
151
152
153
154
155
156
157
|
# File 'app/models/bulkrax/csv_entry.rb', line 150
def add_ingested_metadata
record.each do |key, value|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
add_metadata(key_without_numbers(key), value, index)
end
end
|
136
137
138
139
140
141
142
143
144
145
146
147
148
|
# File 'app/models/bulkrax/csv_entry.rb', line 136
def add_metadata_for_model
if factory_class.present? && factory_class == Bulkrax.collection_model_class
add_collection_type_gid if defined?(::Hyrax)
elsif factory_class == Bulkrax.file_model_class
validate_presence_of_filename!
add_path_to_file
validate_presence_of_parent!
else
add_file unless importerexporter.metadata_only?
add_admin_set_id
end
end
|
173
174
175
176
177
178
179
180
181
182
183
|
# File 'app/models/bulkrax/csv_entry.rb', line 173
def build_export_metadata
self.parsed_metadata = {}
build_system_metadata
build_files_metadata if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
build_relationship_metadata
build_mapping_metadata
self.save!
self.parsed_metadata
end
|
197
198
199
200
201
202
203
204
205
206
207
208
|
# File 'app/models/bulkrax/csv_entry.rb', line 197
def build_files_metadata
if hyrax_record.work?
build_thumbnail_files
else
file_mapping = key_for_export('file')
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
filenames = map_file_sets(file_sets)
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
end
end
|
266
267
268
269
270
271
272
273
274
|
# File 'app/models/bulkrax/csv_entry.rb', line 266
def build_mapping_metadata
mapping = fetch_field_mapping
mapping.each do |key, value|
method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
next unless method_name
send(method_name, key, value)
end
end
|
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
# File 'app/models/bulkrax/csv_entry.rb', line 92
def build_metadata
validate_record
self.parsed_metadata = {}
add_identifier
establish_factory_class
add_ingested_metadata
add_collections
add_visibility
add_metadata_for_model
add_rights_statement
sanitize_controlled_uri_values!
add_local
self.parsed_metadata
end
|
limited metadata is needed for delete jobs
111
112
113
114
115
116
|
# File 'app/models/bulkrax/csv_entry.rb', line 111
def build_metadata_for_delete
self.parsed_metadata = {}
establish_factory_class
add_ingested_metadata
self.parsed_metadata
end
|
#build_object(_key, value) ⇒ Object
276
277
278
279
280
281
282
283
284
|
# File 'app/models/bulkrax/csv_entry.rb', line 276
def build_object(_key, value)
return unless hyrax_record.respond_to?(value['object'])
data = hyrax_record.send(value['object'])
return if data.empty?
data = data.to_a if data.is_a?(ActiveTriples::Relation)
object_metadata(Array.wrap(data))
end
|
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
|
# File 'app/models/bulkrax/csv_entry.rb', line 210
def build_relationship_metadata
relationship_methods = {
related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids parent],
related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids member_ids]
}
relationship_methods.each do |relationship_key, methods|
next if relationship_key.blank?
values = []
methods.each do |m|
value = hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
value_id = value.try(:id)&.to_s || value values << value_id if value_id.present?
end
values = values.flatten.uniq
next if values.blank?
handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
end
end
|
Metadata required by Bulkrax for round-tripping
186
187
188
189
190
191
192
193
194
195
|
# File 'app/models/bulkrax/csv_entry.rb', line 186
def build_system_metadata
self.parsed_metadata['id'] = hyrax_record.id
source_id = hyrax_record.send(work_identifier)
source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
source_id = Array.wrap(source_id).first
self.parsed_metadata[source_identifier] = source_id
model_name = Bulkrax.object_factory.model_name(resource: hyrax_record)
self.parsed_metadata[key_for_export('model')] = model_name
end
|
#build_thumbnail_files ⇒ Object
353
354
355
356
357
358
359
360
361
|
# File 'app/models/bulkrax/csv_entry.rb', line 353
def build_thumbnail_files
return unless importerexporter.include_thumbnails
thumbnail = Bulkrax.object_factory.thumbnail_for(resource: hyrax_record)
return unless thumbnail
filenames = map_file_sets(Array.wrap(thumbnail))
thumbnail_mapping = 'thumbnail_file'
handle_join_on_export(thumbnail_mapping, filenames, false)
end
|
#build_value(property_name, mapping_config) ⇒ Object
286
287
288
289
290
291
292
293
294
295
296
297
298
|
# File 'app/models/bulkrax/csv_entry.rb', line 286
def build_value(property_name, mapping_config)
return unless hyrax_record.respond_to?(property_name.to_s)
data = hyrax_record.send(property_name.to_s)
if mapping_config['join'] || !data.is_a?(Enumerable)
self.parsed_metadata[key_for_export(property_name)] = prepare_export_data_with_join(data)
else
data.each_with_index do |d, i|
self.parsed_metadata["#{key_for_export(property_name)}_#{i + 1}"] = prepare_export_data(d)
end
end
end
|
#collection_identifiers ⇒ Object
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
|
# File 'app/models/bulkrax/csv_entry.rb', line 382
def collection_identifiers
return @collection_identifiers if @collection_identifiers.present?
parent_field_mapping = self.class.parent_field(parser)
return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?
identifiers = []
split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
split_references.each do |c_reference|
matching_collection_entries = importerexporter.entries.select do |e|
(e.raw_metadata&.[](source_identifier) == c_reference) &&
e.is_a?(CsvCollectionEntry)
end
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
identifiers << matching_collection_entries.first&.identifier
end
@collection_identifiers = identifiers.compact.presence || []
end
|
#collections_created? ⇒ Boolean
401
402
403
404
|
# File 'app/models/bulkrax/csv_entry.rb', line 401
def collections_created?
true
end
|
#establish_factory_class ⇒ Object
130
131
132
133
134
|
# File 'app/models/bulkrax/csv_entry.rb', line 130
def establish_factory_class
parser.model_field_mappings.each do |key|
add_metadata('model', record[key]) if record.key?(key)
end
end
|
#find_collection_ids ⇒ Object
406
407
408
409
410
411
412
413
414
415
416
417
|
# File 'app/models/bulkrax/csv_entry.rb', line 406
def find_collection_ids
return self.collection_ids if collections_created?
if collection_identifiers.present?
collection_identifiers.each do |collection_id|
c = find_collection(collection_id)
skip = c.blank? || self.collection_ids.include?(c.id)
self.collection_ids << c.id unless skip
end
end
self.collection_ids
end
|
#handle_join_on_export(key, values, join) ⇒ Object
363
364
365
366
367
368
369
370
371
372
|
# File 'app/models/bulkrax/csv_entry.rb', line 363
def handle_join_on_export(key, values, join)
if join
parsed_metadata[key] = values.join(Bulkrax.multi_value_element_join_on)
else
values.each_with_index do |value, i|
parsed_metadata["#{key}_#{i + 1}"] = value
end
parsed_metadata.delete(key)
end
end
|
#key_for_export(key) ⇒ Object
On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
302
303
304
305
306
307
|
# File 'app/models/bulkrax/csv_entry.rb', line 302
def key_for_export(key)
clean_key = key_without_numbers(key)
unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
"#{unnumbered_key}#{key.sub(clean_key, '')}"
end
|
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
|
# File 'app/models/bulkrax/csv_entry.rb', line 325
def object_metadata(data)
data = data.map { |d| eval(d) }.flatten
data.each_with_index do |obj, index|
next if obj.nil?
obj = obj.with_indifferent_access
obj.each_key do |key|
if obj[key].is_a?(Array)
obj[key].each_with_index do |_nested_item, nested_index|
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
end
else
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
end
end
end
end
|
#path_to_file(file) ⇒ Object
If only filename is given, construct the path (/files/my_file)
420
421
422
423
424
425
426
427
|
# File 'app/models/bulkrax/csv_entry.rb', line 420
def path_to_file(file)
return file if File.exist?(file)
path = importerexporter.parser.path_to_files
f = File.join(path, file)
return f if File.exist?(f)
raise "File #{f} does not exist"
end
|
#prepare_export_data(datum) ⇒ Object
317
318
319
320
321
322
323
|
# File 'app/models/bulkrax/csv_entry.rb', line 317
def prepare_export_data(datum)
if datum.is_a?(ActiveTriples::Resource)
datum.to_uri.to_s
else
datum
end
end
|
#prepare_export_data_with_join(data) ⇒ Object
309
310
311
312
313
314
315
|
# File 'app/models/bulkrax/csv_entry.rb', line 309
def prepare_export_data_with_join(data)
return data.to_s unless data.is_a?(Enumerable)
return "" if data.empty?
data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
end
|
#record ⇒ Object
374
375
376
|
# File 'app/models/bulkrax/csv_entry.rb', line 374
def record
@record ||= raw_metadata
end
|
#validate_record ⇒ Object
118
119
120
121
122
123
124
|
# File 'app/models/bulkrax/csv_entry.rb', line 118
def validate_record
raise RecordNotFound, 'Record not found' if record.nil?
unless importerexporter.parser.required_elements?(record)
raise MissingMetadata, "Missing required elements, missing element(s) are: "\
"#{importerexporter.parser.missing_elements(record).join(', ')}"
end
end
|