Class: Bulkrax::CsvEntry

Inherits:
Entry show all
Defined in:
app/models/bulkrax/csv_entry.rb

Overview

TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense. We do too much in these entry classes. We need to extract the common logic from the various entry models into a module that can be shared between them.

Direct Known Subclasses

CsvCollectionEntry, CsvFileSetEntry

Defined Under Namespace

Modules: AttributeBuilderMethod Classes: CsvWrapper

Instance Attribute Summary

Attributes inherited from Entry

#all_attrs

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Entry

#build, #exporter?, #fetch_field_mapping, #find_collection, #importer?, #last_run, parent_field, #source_identifier, #work_identifier

Methods included from HasLocalProcessing

#add_local

Methods included from StatusInfo

#current_status, #failed?, #last_error, #set_status_info, #skipped?, #status, #status_at, #succeeded?

Methods included from ExportBehavior

#build_for_exporter, #filename, #hyrax_record

Methods included from ImportBehavior

#active_id_for_authority?, #add_admin_set_id, #add_collections, #add_rights_statement, #add_user_to_permission_templates!, #add_visibility, #build_for_importer, #child_jobs, #factory, #factory_class, #override_rights_statement, #parent_jobs, #rights_statement, #sanitize_controlled_uri_value, #sanitize_controlled_uri_values!, #validate_value

Methods included from HasMatchers

#add_metadata, #excluded?, #field_supported?, #field_to, #fields_that_are_always_multiple, #fields_that_are_always_singular, #get_object_name, #matched_metadata, #multiple?, #multiple_metadata, #schema_form_definitions, #set_parsed_data, #set_parsed_object_data, #single_metadata, #supported_bulkrax_fields

Class Method Details

.data_for_entry(data, _source_id, parser) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
# File 'app/models/bulkrax/csv_entry.rb', line 58

def self.data_for_entry(data, _source_id, parser)
  # If a multi-line CSV data is passed, grab the first row
  data = data.first if data.is_a?(CSV::Table)
  # model has to be separated so that it doesn't get mistranslated by to_h
  raw_data = data.to_h
  raw_data[:model] = data[:model] if data[:model].present?
  # If the collection field mapping is not 'collection', add 'collection' - the parser needs it
  # TODO: change to :parents
  raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
  return raw_data
end

.fields_from_data(data) ⇒ Object



10
11
12
# File 'app/models/bulkrax/csv_entry.rb', line 10

def self.fields_from_data(data)
  data.headers.flatten.compact.uniq
end

.matcher_classObject



342
343
344
# File 'app/models/bulkrax/csv_entry.rb', line 342

def self.matcher_class
  Bulkrax::CsvMatcher
end

.read_data(path) ⇒ Object

there’s a risk that this reads the whole file into memory and could cause a memory leak we strip any special characters out of the headers. looking at you Excel

Raises:

  • (StandardError)


18
19
20
21
22
23
24
25
26
27
28
# File 'app/models/bulkrax/csv_entry.rb', line 18

def self.read_data(path)
  raise StandardError, 'CSV path empty' if path.blank?
  options = {
    headers: true,
    header_converters: ->(h) { h.to_s.gsub(/[^\w\d\. -]+/, '').strip.to_sym },
    encoding: 'utf-8'
  }.merge(csv_read_data_options)

  results = CSV.read(path, **options)
  csv_wrapper_class.new(results)
end

Instance Method Details

#add_fileObject



129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'app/models/bulkrax/csv_entry.rb', line 129

def add_file
  self.['file'] ||= []
  if record['file']&.is_a?(String)
    self.['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
  elsif record['file'].is_a?(Array)
    self.['file'] = record['file']
  end
  self.['file'] = self.['file'].map do |f|
    next if f.blank?

    path_to_file(f.tr(' ', '_'))
  end.compact
end

#add_identifierObject



96
97
98
# File 'app/models/bulkrax/csv_entry.rb', line 96

def add_identifier
  self.[work_identifier] = [record[source_identifier]]
end

#add_ingested_metadataObject



120
121
122
123
124
125
126
127
# File 'app/models/bulkrax/csv_entry.rb', line 120

def 
  # we do not want to sort the values in the record before adding the metadata.
  # if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
  record.each do |key, value|
    index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
    (key_without_numbers(key), value, index)
  end
end

#add_metadata_for_modelObject



106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'app/models/bulkrax/csv_entry.rb', line 106

def 
  if factory_class.present? && factory_class == Bulkrax.collection_model_class
    add_collection_type_gid if defined?(::Hyrax)
    # add any additional collection metadata methods here
  elsif factory_class == Bulkrax.file_model_class
    validate_presence_of_filename!
    add_path_to_file
    validate_presence_of_parent!
  else
    add_file unless importerexporter.
    add_admin_set_id
  end
end

#build_export_metadataObject



143
144
145
146
147
148
149
150
151
152
153
# File 'app/models/bulkrax/csv_entry.rb', line 143

def 
  self. = {}

  
   if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
  
  
  self.save!

  self.
end

#build_files_metadataObject



167
168
169
170
171
172
173
174
175
176
177
178
# File 'app/models/bulkrax/csv_entry.rb', line 167

def 
  # attaching files to the FileSet row only so we don't have duplicates when importing to a new tenant
  if hyrax_record.work?
    build_thumbnail_files
  else
    file_mapping = key_for_export('file')
    file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
    filenames = map_file_sets(file_sets)

    handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
  end
end

#build_mapping_metadataObject



230
231
232
233
234
235
236
237
238
# File 'app/models/bulkrax/csv_entry.rb', line 230

def 
  mapping = fetch_field_mapping
  mapping.each do |key, value|
    method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
    next unless method_name

    send(method_name, key, value)
  end
end

#build_metadataObject



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'app/models/bulkrax/csv_entry.rb', line 70

def 
  validate_record

  self. = {}
  add_identifier
  establish_factory_class
  
  # TODO(alishaevn): remove the collections stuff entirely and only reference collections via the new parents code
  add_collections
  add_visibility
  
  add_rights_statement
  sanitize_controlled_uri_values!
  add_local

  self.
end

#build_object(_key, value) ⇒ Object



240
241
242
243
244
245
246
247
248
# File 'app/models/bulkrax/csv_entry.rb', line 240

def build_object(_key, value)
  return unless hyrax_record.respond_to?(value['object'])

  data = hyrax_record.send(value['object'])
  return if data.empty?

  data = data.to_a if data.is_a?(ActiveTriples::Relation)
  (Array.wrap(data))
end

#build_relationship_metadataObject



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'app/models/bulkrax/csv_entry.rb', line 180

def 
  # Includes all relationship methods for all exportable record types (works, Collections, FileSets)
  relationship_methods = {
    related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids],
    related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids]
  }

  relationship_methods.each do |relationship_key, methods|
    next if relationship_key.blank?

    values = []
    methods.each do |m|
      values << hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
    end
    values = values.flatten.uniq
    next if values.blank?

    handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
  end
end

#build_system_metadataObject

Metadata required by Bulkrax for round-tripping



156
157
158
159
160
161
162
163
164
165
# File 'app/models/bulkrax/csv_entry.rb', line 156

def 
  self.['id'] = hyrax_record.id
  source_id = hyrax_record.send(work_identifier)
  # Because ActiveTriples::Relation does not respond to #to_ary we can't rely on Array.wrap universally
  source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
  source_id = Array.wrap(source_id).first
  self.[source_identifier] = source_id
  model_name = hyrax_record.respond_to?(:to_rdf_representation) ? hyrax_record.to_rdf_representation : hyrax_record.has_model.first
  self.[key_for_export('model')] = model_name
end

#build_thumbnail_filesObject



317
318
319
320
321
322
323
324
325
# File 'app/models/bulkrax/csv_entry.rb', line 317

def build_thumbnail_files
  return unless importerexporter.include_thumbnails

  thumbnail_mapping = 'thumbnail_file'
  file_sets = Array.wrap(hyrax_record.thumbnail)

  filenames = map_file_sets(file_sets)
  handle_join_on_export(thumbnail_mapping, filenames, false)
end

#build_value(property_name, mapping_config) ⇒ Object



250
251
252
253
254
255
256
257
258
259
260
261
262
# File 'app/models/bulkrax/csv_entry.rb', line 250

def build_value(property_name, mapping_config)
  return unless hyrax_record.respond_to?(property_name.to_s)

  data = hyrax_record.send(property_name.to_s)

  if mapping_config['join'] || !data.is_a?(Enumerable)
    self.[key_for_export(property_name)] = prepare_export_data_with_join(data)
  else
    data.each_with_index do |d, i|
      self.["#{key_for_export(property_name)}_#{i + 1}"] = prepare_export_data(d)
    end
  end
end

#collection_identifiersObject



346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
# File 'app/models/bulkrax/csv_entry.rb', line 346

def collection_identifiers
  return @collection_identifiers if @collection_identifiers.present?

  parent_field_mapping = self.class.parent_field(parser)
  return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?

  identifiers = []
  split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
  split_references.each do |c_reference|
    matching_collection_entries = importerexporter.entries.select do |e|
      (e.&.[](source_identifier) == c_reference) &&
        e.is_a?(CsvCollectionEntry)
    end
    raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
    identifiers << matching_collection_entries.first&.identifier
  end
  @collection_identifiers = identifiers.compact.presence || []
end

#collections_created?Boolean

Returns:

  • (Boolean)


365
366
367
368
# File 'app/models/bulkrax/csv_entry.rb', line 365

def collections_created?
  # TODO: look into if this method is still needed after new relationships code
  true
end

#establish_factory_classObject



100
101
102
103
104
# File 'app/models/bulkrax/csv_entry.rb', line 100

def establish_factory_class
  parser.model_field_mappings.each do |key|
    ('model', record[key]) if record.key?(key)
  end
end

#find_collection_idsObject



370
371
372
373
374
375
376
377
378
379
380
381
# File 'app/models/bulkrax/csv_entry.rb', line 370

def find_collection_ids
  return self.collection_ids if collections_created?
  if collection_identifiers.present?
    collection_identifiers.each do |collection_id|
      c = find_collection(collection_id)
      skip = c.blank? || self.collection_ids.include?(c.id)
      self.collection_ids << c.id unless skip
    end
  end

  self.collection_ids
end

#handle_join_on_export(key, values, join) ⇒ Object



327
328
329
330
331
332
333
334
335
336
# File 'app/models/bulkrax/csv_entry.rb', line 327

def handle_join_on_export(key, values, join)
  if join
    [key] = values.join(Bulkrax.multi_value_element_join_on)
  else
    values.each_with_index do |value, i|
      ["#{key}_#{i + 1}"] = value
    end
    .delete(key)
  end
end

#key_for_export(key) ⇒ Object

On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically



266
267
268
269
270
271
# File 'app/models/bulkrax/csv_entry.rb', line 266

def key_for_export(key)
  clean_key = key_without_numbers(key)
  unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
  # Bring the number back if there is one
  "#{unnumbered_key}#{key.sub(clean_key, '')}"
end

#object_metadata(data) ⇒ Object



289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'app/models/bulkrax/csv_entry.rb', line 289

def (data)
  # NOTE: What is `d` in this case:
  #
  #  "[{\"single_object_first_name\"=>\"Fake\", \"single_object_last_name\"=>\"Fakerson\", \"single_object_position\"=>\"Leader, Jester, Queen\", \"single_object_language\"=>\"english\"}]"
  #
  # The above is a stringified version of a Ruby string.  Using eval is a very bad idea as it
  # will execute the value of `d` within the full Ruby interpreter context.
  #
  # TODO: Would it be possible to store this as a non-string?  Maybe the actual Ruby Array and Hash?
  data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval

  data.each_with_index do |obj, index|
    next if obj.nil?
    # allow the object_key to be valid whether it's a string or symbol
    obj = obj.with_indifferent_access

    obj.each_key do |key|
      if obj[key].is_a?(Array)
        obj[key].each_with_index do |_nested_item, nested_index|
          self.["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
        end
      else
        self.["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
      end
    end
  end
end

#path_to_file(file) ⇒ Object

If only filename is given, construct the path (/files/my_file)



384
385
386
387
388
389
390
391
# File 'app/models/bulkrax/csv_entry.rb', line 384

def path_to_file(file)
  # return if we already have the full file path
  return file if File.exist?(file)
  path = importerexporter.parser.path_to_files
  f = File.join(path, file)
  return f if File.exist?(f)
  raise "File #{f} does not exist"
end

#prepare_export_data(datum) ⇒ Object



281
282
283
284
285
286
287
# File 'app/models/bulkrax/csv_entry.rb', line 281

def prepare_export_data(datum)
  if datum.is_a?(ActiveTriples::Resource)
    datum.to_uri.to_s
  else
    datum
  end
end

#prepare_export_data_with_join(data) ⇒ Object



273
274
275
276
277
278
279
# File 'app/models/bulkrax/csv_entry.rb', line 273

def prepare_export_data_with_join(data)
  # Yes...it's possible we're asking to coerce a multi-value but only have a single value.
  return data.to_s unless data.is_a?(Enumerable)
  return "" if data.empty?

  data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
end

#recordObject



338
339
340
# File 'app/models/bulkrax/csv_entry.rb', line 338

def record
  @record ||= 
end

#validate_recordObject

Raises:

  • (StandardError)


88
89
90
91
92
93
94
# File 'app/models/bulkrax/csv_entry.rb', line 88

def validate_record
  raise StandardError, 'Record not found' if record.nil?
  unless importerexporter.parser.required_elements?(record)
    raise StandardError, "Missing required elements, missing element(s) are: "\
"#{importerexporter.parser.missing_elements(record).join(', ')}"
  end
end