Class: Bulkrax::OaiDcParser

Inherits:
ApplicationParser show all
Defined in:
app/parsers/bulkrax/oai_dc_parser.rb

Direct Known Subclasses

OaiQualifiedDcParser

Instance Attribute Summary collapse

Attributes inherited from ApplicationParser

#importerexporter

Instance Method Summary collapse

Methods inherited from ApplicationParser

#base_path, #calculate_type_delay, #create_entry_and_job, export_supported?, #exporter?, #file_sets_total, #find_or_create_entry, #generated_metadata_mapping, #get_field_mapping_hash_for, #import_file_path, import_supported?, #importer?, #invalid_record, #limit_reached?, #model_field_mappings, #new_entry, parser_fields, #path_for_import, #perform_method, #rebuild_entries, #rebuild_entry_query, #record, #related_children_parsed_mapping, #related_children_raw_mapping, #related_parents_parsed_mapping, #related_parents_raw_mapping, #required_elements, #retrieve_cloud_files, #setup_export_file, #source_identifier, #untar, #unzip, #valid_import?, #visibility, #work_entry_class, #work_identifier, #work_identifier_search_field, #write, #write_files, #write_import_file, #zip

Constructor Details

#initialize(importerexporter) ⇒ OaiDcParser

Returns a new instance of OaiDcParser.



8
9
10
11
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 8

def initialize(importerexporter)
  super
  @headers = { from: importerexporter.user.email }
end

Instance Attribute Details

#headersObject

Returns the value of attribute headers.



5
6
7
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 5

def headers
  @headers
end

Instance Method Details

#clientObject



13
14
15
16
17
18
19
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 13

def client
  @client ||= OAI::Client.new(importerexporter.parser_fields['base_url'],
                              headers: headers,
                              parser: 'libxml')
rescue StandardError
  raise OAIError
end

#collection_entry_classObject



29
30
31
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 29

def collection_entry_class
  OaiSetEntry
end

#collection_nameObject



21
22
23
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 21

def collection_name
  @collection_name ||= parser_fields['set'] || 'all'
end

#collectionsObject



140
141
142
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 140

def collections
  @collections ||= list_sets
end

#collections_totalObject



144
145
146
147
148
149
150
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 144

def collections_total
  if collection_name == 'all'
    collections.count
  else
    1
  end
end

#create_collectionsObject



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 72

def create_collections
   = {
    visibility: 'open'
  }
  [:collection_type_gid] = Hyrax::CollectionType.find_or_create_default_collection_type.to_global_id.to_s if defined?(::Hyrax)

  collections.each_with_index do |set, index|
    next unless collection_name == 'all' || collection_name == set.spec
    unique_collection_identifier = importerexporter.unique_collection_identifier(set.spec)
    [:title] = [set.name]
    [work_identifier] = [unique_collection_identifier]

    new_entry = collection_entry_class.where(importerexporter: importerexporter, identifier: unique_collection_identifier, raw_metadata: ).first_or_create!
    # perform now to ensure this gets created before work imports start
    ImportCollectionJob.perform_now(new_entry.id, importerexporter.current_run.id)
    increment_counters(index, collection: true)
  end
end

#create_file_setsObject



108
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 108

def create_file_sets; end

#create_objects(types = []) ⇒ Object



66
67
68
69
70
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 66

def create_objects(types = [])
  types.each do |object_type|
    send("create_#{object_type.pluralize}")
  end
end

#create_relationshipsObject



110
111
112
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 110

def create_relationships
  ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
end

#create_worksObject



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 91

def create_works
  results = self.records(quick: true)
  return if results.blank?
  results.full.each_with_index do |record, index|
    identifier = record_has_source_identifier(record, index)
    next unless identifier
    break if limit_reached?(limit, index)

    seen[identifier] = true
    create_entry_and_job(record, 'work', identifier)
    increment_counters(index, work: true)
  end
  importer.record_status
rescue StandardError => e
  set_status_info(e)
end

#entry_classObject



25
26
27
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 25

def entry_class
  OaiDcEntry
end

#file_set_entry_classObject



33
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 33

def file_set_entry_class; end

#import_fieldsObject

the set of fields available in the import data



60
61
62
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 60

def import_fields
  ['contributor', 'coverage', 'creator', 'date', 'description', 'format', 'identifier', 'language', 'publisher', 'relation', 'rights', 'source', 'subject', 'title', 'type']
end

#record_deleted?(_record) ⇒ Boolean

Returns:

  • (Boolean)


118
119
120
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 118

def record_deleted?(_record)
  false
end

#record_has_source_identifier(record, index) ⇒ Object

oai records so not let us set the source identifier easily



127
128
129
130
131
132
133
134
135
136
137
138
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 127

def record_has_source_identifier(record, index)
  identifier = record.send(source_identifier)
  if identifier.blank?
    if Bulkrax.fill_in_blank_source_identifiers.present?
      identifier = Bulkrax.fill_in_blank_source_identifiers.call(self, index)
    else
      invalid_record("Missing #{source_identifier} for #{record.to_h}\n")
      return false
    end
  end
  identifier
end

#record_raw_metadata(_record) ⇒ Object



114
115
116
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 114

def (_record)
  nil
end

#record_remove_and_rerun?(_record) ⇒ Boolean

Returns:

  • (Boolean)


122
123
124
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 122

def record_remove_and_rerun?(_record)
  false
end

#records(opts = {}) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 35

def records(opts = {})
  opts[:metadata_prefix] ||= importerexporter.parser_fields['metadata_prefix']
  opts[:set] = collection_name unless collection_name == 'all'

  opts[:from] = importerexporter&.last_imported_at&.strftime("%Y-%m-%d") if importerexporter.last_imported_at && only_updates

  if opts[:quick]
    opts.delete(:quick)
    begin
      @short_records = client.list_identifiers(opts)
    rescue OAI::Exception => e
      return @short_records = [] if e.code == "noRecordsMatch"
      raise e
    end
  else
    begin
      @records ||= client.list_records(opts.merge(metadata_prefix: parser_fields['metadata_prefix']))
    rescue OAI::Exception => e
      return @records = [] if e.code == "noRecordsMatch"
      raise e
    end
  end
end

#totalObject



157
158
159
160
161
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 157

def total
  @total ||= records(quick: true).doc.find(".//resumptionToken").to_a.first.attributes["completeListSize"].to_i
rescue
  @total = 0
end

#works_totalObject

TODO: change to differentiate between collection and work records when adding ability to import collection metadata



153
154
155
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 153

def works_total
  total
end