Class: Bulkrax::XmlEntry

Inherits:
Entry show all
Defined in:
app/models/bulkrax/xml_entry.rb

Overview

Generic XML Entry

Instance Attribute Summary

Attributes inherited from Entry

#all_attrs

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Entry

#build, #exporter?, #fetch_field_mapping, #find_collection, #importer?, #last_run, parent_field, #source_identifier, #work_identifier

Methods included from HasLocalProcessing

#add_local

Methods included from StatusInfo

#current_status, #failed?, #last_error, #set_status_info, #skipped?, #status, #status_at, #succeeded?

Methods included from ExportBehavior

#build_export_metadata, #build_for_exporter, #filename, #hyrax_record

Methods included from ImportBehavior

#active_id_for_authority?, #add_admin_set_id, #add_collections, #add_rights_statement, #add_user_to_permission_templates!, #add_visibility, #build_for_importer, #child_jobs, #collections_created?, #factory, #factory_class, #find_collection_ids, #override_rights_statement, #parent_jobs, #rights_statement, #sanitize_controlled_uri_value, #sanitize_controlled_uri_values!, #validate_value

Methods included from HasMatchers

#add_metadata, #excluded?, #field_supported?, #field_to, #fields_that_are_always_multiple, #fields_that_are_always_singular, #get_object_name, #matched_metadata, #multiple?, #multiple_metadata, #schema_form_definitions, #set_parsed_data, #set_parsed_object_data, #single_metadata, #supported_bulkrax_fields

Class Method Details

.data_for_entry(data, source_id, _parser) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'app/models/bulkrax/xml_entry.rb', line 16

def self.data_for_entry(data, source_id, _parser)
  collections = []
  children = []
  xpath_for_source_id = ".//*[name()='#{source_id}']"
  return {
    source_id => data.xpath(xpath_for_source_id).first.text,
    delete: data.xpath(".//*[name()='delete']").first&.text,
    data:
      data.to_xml(
        encoding: 'UTF-8',
        save_with:
          Nokogiri::XML::Node::SaveOptions::NO_DECLARATION | Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
      ).delete("\n").delete("\t").squeeze(' '), # Remove newlines, tabs, and extra whitespace
    collection: collections,
    children: children
  }
end

.fields_from_data(data) ⇒ Object



8
# File 'app/models/bulkrax/xml_entry.rb', line 8

def self.fields_from_data(data); end

.read_data(path) ⇒ Object



10
11
12
13
14
# File 'app/models/bulkrax/xml_entry.rb', line 10

def self.read_data(path)
  # This doesn't cope with BOM sequences:
  # Nokogiri::XML(open(path), nil, 'UTF-8').remove_namespaces!
  Nokogiri::XML(open(path)).remove_namespaces!
end

Instance Method Details

#add_metadata_from_recordObject



73
74
75
76
77
# File 'app/models/bulkrax/xml_entry.rb', line 73

def 
   do |name, content|
    (name, content)
  end
end

#build_metadataObject

Raises:

  • (StandardError)


40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'app/models/bulkrax/xml_entry.rb', line 40

def 
  raise StandardError, 'Record not found' if record.nil?
  raise StandardError, "Missing source identifier (#{source_identifier})" if self.[source_identifier].blank?
  self. = {}
  self.[work_identifier] = [self.[source_identifier]]

  # We need to establish the #factory_class before we proceed with the metadata.  See
  # https://github.com/samvera-labs/bulkrax/issues/702 for further details.
  #
  # tl;dr - if we don't have the right factory_class we might skip properties that are
  # specifically assigned to the factory class
  establish_factory_class
  
  add_visibility
  add_rights_statement
  add_admin_set_id
  add_collections
  self.['file'] = self.['file']

  add_local
  raise StandardError, "title is required" if self.['title'].blank?
  self.
end

#each_candidate_metadata_node_name_and_content(elements: field_mapping_from_values_for_xml_element_names) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'app/models/bulkrax/xml_entry.rb', line 79

def (elements: field_mapping_from_values_for_xml_element_names)
  elements.each do |name|
    # NOTE: the XML element name's case matters
    nodes = record.xpath("//*[name()='#{name}']")
    next if nodes.empty?

    nodes.each do |node|
      node.children.each do |content|
        next if content.to_s.blank?

        yield(name, content.to_s)
      end
    end
  end
end

#establish_factory_classObject



64
65
66
67
68
69
70
71
# File 'app/models/bulkrax/xml_entry.rb', line 64

def establish_factory_class
  model_field_names = parser.model_field_mappings

  (elements: parser.model_field_mappings) do |name, content|
    next unless model_field_names.include?(name)
    (name, content)
  end
end

#field_mapping_from_values_for_xml_element_namesArray<String> Also known as: xml_elements

TODO:

Additionally, we may want to revisit the XML parser fundamental logic; namely we only parse nodes that are explicitly declared with in the ‘from`. This is a bit different than other parsers, in that they will make assumptions about each encountered column (in the case of CSV) or node (in the case of OAI). tl;dr - Here there be dragons.

Returns the explicitly declared “from” key’s value of each parser’s element’s value. (Yes, I would like a proper class for the thing I just tried to describe.)

Returns:

  • (Array<String>)


104
105
106
107
108
# File 'app/models/bulkrax/xml_entry.rb', line 104

def field_mapping_from_values_for_xml_element_names
  Bulkrax.field_mappings[self.importerexporter.parser_klass].map do |_k, v|
    v[:from]
  end.flatten.compact.uniq
end

#recordObject

def self.matcher_class; end



36
37
38
# File 'app/models/bulkrax/xml_entry.rb', line 36

def record
  @record ||= Nokogiri::XML(self.['data'], nil, 'UTF-8')
end