Class: Bulkrax::XmlParser

Inherits:
ApplicationParser show all
Defined in:
app/parsers/bulkrax/xml_parser.rb

Instance Attribute Summary

Attributes inherited from ApplicationParser

#headers, #importerexporter

Instance Method Summary collapse

Methods inherited from ApplicationParser

#base_path, #collections_total, #create_objects, #create_relationships, export_supported?, #exporter?, #file_sets_total, #find_or_create_entry, #generated_metadata_mapping, #get_field_mapping_hash_for, #import_file_path, import_supported?, #importer?, #initialize, #invalid_record, #limit_reached?, #model_field_mappings, #new_entry, parser_fields, #path_for_import, #perform_method, #record, #record_has_source_identifier, #related_children_parsed_mapping, #related_children_raw_mapping, #related_parents_parsed_mapping, #related_parents_raw_mapping, #required_elements, #retrieve_cloud_files, #setup_export_file, #source_identifier, #unzip, #visibility, #work_identifier, #write, #write_files, #write_import_file, #zip

Constructor Details

This class inherits a constructor from Bulkrax::ApplicationParser

Instance Method Details

#collection_entry_classObject

TODO:

not yet supported



11
# File 'app/parsers/bulkrax/xml_parser.rb', line 11

def collection_entry_class; end

#create_collectionsObject

TODO:

not yet supported



14
# File 'app/parsers/bulkrax/xml_parser.rb', line 14

def create_collections; end

#create_file_setsObject

TODO:

not yet supported



20
# File 'app/parsers/bulkrax/xml_parser.rb', line 20

def create_file_sets; end

#create_worksObject



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'app/parsers/bulkrax/xml_parser.rb', line 95

def create_works
  records.each_with_index do |record, index|
    next unless record_has_source_identifier(record, index)
    break if !limit.nil? && index >= limit

    seen[record[source_identifier]] = true
    new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record)
    if record[:delete].present?
      DeleteWorkJob.send(perform_method, new_entry, current_run)
    else
      ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
    end
    increment_counters(index, work: true)
  end
  importer.record_status
rescue StandardError => e
  set_status_info(e)
end

#entry_classObject



6
7
8
# File 'app/parsers/bulkrax/xml_parser.rb', line 6

def entry_class
  Bulkrax::XmlEntry
end

#file_pathsObject

Return all files in the import directory and sub-directories



67
68
69
70
71
72
73
74
75
76
# File 'app/parsers/bulkrax/xml_parser.rb', line 67

def file_paths
  @file_paths ||=
    # Relative to the file
    if file?
      Dir.glob("#{File.dirname(import_file_path)}/**/*").reject { |f| File.file?(f) == false }
    # In the supplied directory
    else
      Dir.glob("#{import_file_path}/**/*").reject { |f| File.file?(f) == false }
    end
end

#file_set_entry_classObject

TODO:

not yet supported



17
# File 'app/parsers/bulkrax/xml_parser.rb', line 17

def file_set_entry_class; end

#good_file_type?(path) ⇒ Boolean

Returns:

  • (Boolean)


91
92
93
# File 'app/parsers/bulkrax/xml_parser.rb', line 91

def good_file_type?(path)
  %w[.xml .xls .xsd].include?(File.extname(path)) || ::Marcel::MimeType.for(path).include?('application/xml')
end

#import_fieldsObject

TODO:

not yet supported



28
# File 'app/parsers/bulkrax/xml_parser.rb', line 28

def import_fields; end

#metadata_pathsObject

If the import_file_path is an xml file, return that Otherwise return all xml files in the given folder



80
81
82
83
84
85
86
87
88
89
# File 'app/parsers/bulkrax/xml_parser.rb', line 80

def 
  @metadata_paths ||=
    if file? && good_file_type?(import_file_path)
      [import_file_path]
    else
      file_paths.select do |f|
        good_file_type?(f) && f.include?("import_#{importerexporter.id}")
      end
    end
end

#record_elementObject



62
63
64
# File 'app/parsers/bulkrax/xml_parser.rb', line 62

def record_element
  parser_fields['record_element']
end

#records(_opts = {}) ⇒ Object

For multiple, we expect to find metadata for multiple works in the given metadata file(s) For single, we expect to find metadata for a single work in the given metadata file(s)

if the file contains more than one record, we take only the first

In either case there may be multiple metadata files returned by metadata_paths



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'app/parsers/bulkrax/xml_parser.rb', line 43

def records(_opts = {})
  @records ||=
    if parser_fields['import_type'] == 'multiple'
      r = []
      .map do |md|
        # Retrieve all records
        elements = entry_class.read_data(md).xpath("//#{record_element}")
        r += elements.map { |el| entry_class.data_for_entry(el, source_identifier, self) }
      end
      # Flatten because we may have multiple records per array
      r.compact.flatten
    elsif parser_fields['import_type'] == 'single'
      .map do |md|
        data = entry_class.read_data(md).xpath("//#{record_element}").first # Take only the first record
        entry_class.data_for_entry(data, source_identifier, self)
      end.compact # No need to flatten because we take only the first record
    end
end

#totalObject



114
115
116
# File 'app/parsers/bulkrax/xml_parser.rb', line 114

def total
  records.size
end

#valid_import?Boolean

Returns:

  • (Boolean)


30
31
32
33
34
35
36
37
# File 'app/parsers/bulkrax/xml_parser.rb', line 30

def valid_import?
  raise StandardError, 'No metadata files found' if .blank?
  raise StandardError, 'No records found' if records.blank?
  true
rescue StandardError => e
  set_status_info(e)
  false
end

#works_totalObject

TODO: change to differentiate between collection and work records when adding ability to import collection metadata



23
24
25
# File 'app/parsers/bulkrax/xml_parser.rb', line 23

def works_total
  total
end