Class: ImportDataset::DarwinCore::Occurrences

Inherits:
ImportDataset::DarwinCore show all
Defined in:
app/models/import_dataset/darwin_core/occurrences.rb

Constant Summary collapse

MINIMUM_FIELD_SET =

TODO: Can occurrenceID requirement be dropped? Should other fields be added here?

["occurrenceID", "scientificName", "basisOfRecord"]

Constants inherited from ImportDataset::DarwinCore

CHECKLIST_ROW_TYPE, OCCURRENCES_ROW_TYPE

Instance Attribute Summary

Attributes inherited from ImportDataset

#description, #metadata, #source_content_type, #source_file_name, #source_file_size, #source_updated_at, #status

Instance Method Summary collapse

Methods inherited from ImportDataset::DarwinCore

#add_filters, #core_records_fields, create_with_subtype_detection, #default_nomenclatural_code, #destroy_namespace, #get_core_record_identifier_namespace, #get_dwc_default_values, #get_dwc_headers, #get_dwc_records, #get_field_mapping, #get_fields_mapping, #get_normalized_dwc_term, #get_records, #import, #initialize, #progress, #set_import_settings, #stage, #start_import, #stop_import

Methods inherited from ImportDataset

#delete_origin_relationships, #stage

Methods included from Shared::OriginRelationship

#new_objects, #old_objects, #reject_origin_relationships, #set_origin

Methods included from Shared::IsData

#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_destroyable?, #is_editable?, #is_in_use?, #is_in_users_projects?, #metamorphosize, #similar

Methods included from Housekeeping

#has_polymorphic_relationship?

Methods inherited from ApplicationRecord

transaction_with_retry

Constructor Details

This class inherits a constructor from ImportDataset::DarwinCore

Instance Method Details

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ Object



210
211
212
213
214
215
216
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 210

def add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil)
  unless collection_code.nil? || get_catalog_number_collection_code_namespace_mapping(collection_code)
    self.["catalog_numbers_collection_code_namespaces"] << [collection_code, namespace_id]
    self.["catalog_numbers_collection_code_namespaces"].sort! { |a, b| a[0].to_s <=> b[0].to_s }
  end
  save!
end

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ Object



202
203
204
205
206
207
208
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 202

def add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil)
  unless get_catalog_number_namespace_mapping(institution_code, collection_code)
    self.["catalog_numbers_namespaces"] << [[institution_code, collection_code], namespace_id]
    self.["catalog_numbers_namespaces"].sort! { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  end
  save!
end

#check_field_setObject



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 111

def check_field_set
  if source.staged?
    if source.staged_path =~ /\.zip\z/i
      headers = get_dwc_headers(::DarwinCore.new(source.staged_path).core)
    else
      if source.staged_path =~ /\.(xlsx?|ods)\z/i
        headers = CSV.parse(Roo::Spreadsheet.open(source.staged_path).to_csv, headers: true).headers
      else
        headers = CSV.read(source.staged_path, headers: true, col_sep: "\t", quote_char: nil, encoding: 'bom|utf-8').headers
      end
    end

    missing_headers = MINIMUM_FIELD_SET - headers

    missing_headers.each do |header|
      errors.add(:source, "required field #{header} missing.")
    end
  end
end

#containerize_dup_cat_no?Boolean

Returns:

  • (Boolean)


218
219
220
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 218

def containerize_dup_cat_no?
  !!self..dig("import_settings", "containerize_dup_cat_no")
end

#core_records_classObject



12
13
14
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 12

def core_records_class
  DatasetRecord::DarwinCore::Occurrence
end

#core_records_identifier_nameObject



16
17
18
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 16

def core_records_identifier_name
  'occurrenceID'
end

#enable_organization_determiners?Boolean

Returns:

  • (Boolean)


238
239
240
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 238

def enable_organization_determiners?
  !!self..dig("import_settings", "enable_organization_determiners")
end

#enable_organization_determiners_alt_name?Boolean

Returns:

  • (Boolean)


242
243
244
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 242

def enable_organization_determiners_alt_name?
  !!self..dig("import_settings", "enable_organization_determiners_alt_name")
end

#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ Object (private)



252
253
254
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 252

def get_catalog_number_collection_code_namespace_mapping(collection_code)
  self.["catalog_numbers_collection_code_namespaces"]&.detect { |m| m[0] == collection_code }
end

#get_catalog_number_namespace(institution_code, collection_code) ⇒ Object



131
132
133
134
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 131

def get_catalog_number_namespace(institution_code, collection_code)
  get_catalog_number_namespace_mapping(institution_code, collection_code)&.at(1) ||
  get_catalog_number_collection_code_namespace_mapping(collection_code)&.at(1)
end

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ Object (private)



248
249
250
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 248

def get_catalog_number_namespace_mapping(institution_code, collection_code)
  self.["catalog_numbers_namespaces"]&.detect { |m| m[0] == [institution_code, collection_code] }
end

#get_event_id_namespaceObject



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 20

def get_event_id_namespace
  id = .dig("namespaces", "eventID")

  if id.nil? || (@event_id_identifier_namespace ||= Namespace.find_by(id: id)).nil?
    random = SecureRandom.hex(4)
    project_name = Project.find(Current.project_id).name
    namespace_name = "eventID namespace for \"#{description}\" dataset in \"#{project_name}\" project [#{random}]"

    @event_id_identifier_namespace = Namespace.create!(
      name: namespace_name,
      short_name: "eventID-#{random}",
      verbatim_short_name: "eventID",
      delimiter: ':'
    )

    ["namespaces"]["eventID"] = @event_id_identifier_namespace.id
    save!
  end

  @event_id_identifier_namespace
end

#perform_stagingObject

Stages core (Occurrence) records and all extension records.



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 43

def perform_staging
  records, headers = get_records(source)

  update!(metadata:
    .merge({
      core_headers: headers[:core],
      extensions_headers: headers[:extensions],
      catalog_numbers_namespaces: []
    })
  )

  core_records = records[:core].map do |record|
    {
      src_data: record,
      basisOfRecord: record["basisOfRecord"]
    }
  end

  catalog_numbers_namespaces = Set[]
  catalog_numbers_collection_code_namespaces = Set[]

  core_records.each do |record|
    dwc_occurrence = DatasetRecord::DarwinCore::Occurrence.new(import_dataset: self)
    dwc_occurrence.initialize_data_fields(record[:src_data].map { |k, v| v })

    catalog_numbers_namespaces << [
      [
        dwc_occurrence.get_field_value(:institutionCode),
        dwc_occurrence.get_field_value(:collectionCode)
      ],
      nil # User will select namespace through UI. TODO: Should we attempt guessing here?
    ]
    catalog_numbers_collection_code_namespaces << [dwc_occurrence.get_field_value(:collectionCode), nil]

    if dwc_occurrence.get_field_value(:catalogNumber).blank?
      dwc_occurrence.status = "Ready"
    else
      dwc_occurrence.status = "NotReady"
      record["error_data"] = { messages: { catalogNumber: ["Record cannot be imported until namespace is set."] } }
    end

    record.delete(:src_data)
    dwc_occurrence. = record

    dwc_occurrence.save!
  end

  records[:extensions].each do |extension_type, records|
    records.each do |record|
      dwc_extension = DatasetRecord::DarwinCore::Extension.new(import_dataset: self)
      dwc_extension.initialize_data_fields(record.map { |k, v| v })
      dwc_extension.status = "Unsupported"
      dwc_extension. = { "type" => extension_type }

      dwc_extension.save!
    end
  end

  self..merge!(
    catalog_numbers_namespaces: catalog_numbers_namespaces.sort { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  )
  self..merge!(
    catalog_numbers_collection_code_namespaces: catalog_numbers_collection_code_namespaces.sort { |a, b| a[0].to_s <=> b[0].to_s }
  )

  save!
end

#require_catalog_number_match_verbatim?Boolean

Returns:

  • (Boolean)


234
235
236
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 234

def require_catalog_number_match_verbatim?
  !!self..dig("import_settings", "require_catalog_number_match_verbatim")
end

#require_tripcode_match_verbatim?Boolean

Returns:

  • (Boolean)


230
231
232
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 230

def require_tripcode_match_verbatim?
  !!self..dig("import_settings", "require_tripcode_match_verbatim")
end

#require_type_material_success?Boolean

Returns:

  • (Boolean)


226
227
228
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 226

def require_type_material_success?
  !!self..dig("import_settings", "require_type_material_success")
end

#restrict_to_existing_nomenclature?Boolean

Returns:

  • (Boolean)


222
223
224
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 222

def restrict_to_existing_nomenclature?
  !!self..dig("import_settings", "restrict_to_existing_nomenclature")
end

#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ Object



172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 172

def update_catalog_number_collection_code_namespace(collection_code, namespace_id)
  return if collection_code.nil? # No support for mapping blank data at this time

  transaction do
    mapping = get_catalog_number_collection_code_namespace_mapping(collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    if ready
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).with_value(collection_code).select(:dataset_record_id)
      ).update_all(
        "status = 'Ready', metadata = metadata - 'error_data'"
      )
    else
      institution_codes = self.["catalog_numbers_namespaces"]&.select { |m| m[0][1] == collection_code && m[1] }&.map { |m| m[0][0] } || []
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).with_value(collection_code).select(:dataset_record_id)
      ).where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).with_values(institution_codes).select(:dataset_record_id)
      ).update_all(
        "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
      )
    end
  end
end

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 136

def update_catalog_number_namespace(institution_code, collection_code, namespace_id)
  transaction do
    mapping = get_catalog_number_namespace_mapping(institution_code, collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    # TODO: Add scopes/methods in DatasetRecord to handle nil fields values transparently
    unless institution_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).with_value(institution_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).select(:dataset_record_id)
      )
    end
    unless collection_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).with_value(collection_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).select(:dataset_record_id)
      )
    end

    query.update_all(ready ?
      "status = 'Ready', metadata = metadata - 'error_data'" :
      "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
    )
  end
end