Class: ImportDataset::DarwinCore::Occurrences

Inherits:

ImportDataset::DarwinCore

Object
ActiveRecord::Base
ApplicationRecord
ImportDataset
ImportDataset::DarwinCore
ImportDataset::DarwinCore::Occurrences

Defined in:: app/models/import_dataset/darwin_core/occurrences.rb

Constant Summary collapse

MINIMUM_FIELD_SET = TODO: Can occurrenceID requirement be dropped? Should other fields be added here?

["occurrenceID", "scientificName", "basisOfRecord"]

Constants inherited from ImportDataset::DarwinCore

CHECKLIST_ROW_TYPE, OCCURRENCES_ROW_TYPE

Instance Attribute Summary

Attributes inherited from ImportDataset

#description, #metadata, #source_content_type, #source_file_name, #source_file_size, #source_updated_at, #status

Instance Method Summary collapse

Constructor Details

This class inherits a constructor from ImportDataset::DarwinCore

Instance Method Details

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 210

def add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil)
  unless collection_code.nil? || get_catalog_number_collection_code_namespace_mapping(collection_code)
    self.metadata["catalog_numbers_collection_code_namespaces"] << [collection_code, namespace_id]
    self.metadata["catalog_numbers_collection_code_namespaces"].sort! { |a, b| a[0].to_s <=> b[0].to_s }
  end
  save!
end

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 202

def add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil)
  unless get_catalog_number_namespace_mapping(institution_code, collection_code)
    self.metadata["catalog_numbers_namespaces"] << [[institution_code, collection_code], namespace_id]
    self.metadata["catalog_numbers_namespaces"].sort! { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  end
  save!
end

#check_field_set ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 111

def check_field_set
  if source.staged?
    if source.staged_path =~ /\.zip\z/i
      headers = get_dwc_headers(::DarwinCore.new(source.staged_path).core)
    else
      if source.staged_path =~ /\.(xlsx?|ods)\z/i
        headers = CSV.parse(Roo::Spreadsheet.open(source.staged_path).to_csv, headers: true).headers
      else
        headers = CSV.read(source.staged_path, headers: true, col_sep: "\t", quote_char: nil, encoding: 'bom|utf-8').headers
      end
    end

    missing_headers = MINIMUM_FIELD_SET - headers

    missing_headers.each do |header|
      errors.add(:source, "required field #{header} missing.")
    end
  end
end

#containerize_dup_cat_no? ⇒ `Boolean`

Returns:

(Boolean)



218
219
220

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 218

def containerize_dup_cat_no?
  !!self.metadata.dig("import_settings", "containerize_dup_cat_no")
end

#core_records_class ⇒ `Object`



12
13
14

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 12

def core_records_class
  DatasetRecord::DarwinCore::Occurrence
end

#core_records_identifier_name ⇒ `Object`



16
17
18

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 16

def core_records_identifier_name
  'occurrenceID'
end

#enable_organization_determiners? ⇒ `Boolean`

Returns:

(Boolean)



238
239
240

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 238

def enable_organization_determiners?
  !!self.metadata.dig("import_settings", "enable_organization_determiners")
end

#enable_organization_determiners_alt_name? ⇒ `Boolean`

Returns:

(Boolean)



242
243
244

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 242

def enable_organization_determiners_alt_name?
  !!self.metadata.dig("import_settings", "enable_organization_determiners_alt_name")
end

#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ `Object` (private)



252
253
254

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 252

def get_catalog_number_collection_code_namespace_mapping(collection_code)
  self.metadata["catalog_numbers_collection_code_namespaces"]&.detect { |m| m[0] == collection_code }
end

#get_catalog_number_namespace(institution_code, collection_code) ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 131

def get_catalog_number_namespace(institution_code, collection_code)
  get_catalog_number_namespace_mapping(institution_code, collection_code)&.at(1) ||
  get_catalog_number_collection_code_namespace_mapping(collection_code)&.at(1)
end

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ `Object` (private)



248
249
250

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 248

def get_catalog_number_namespace_mapping(institution_code, collection_code)
  self.metadata["catalog_numbers_namespaces"]&.detect { |m| m[0] == [institution_code, collection_code] }
end

#get_event_id_namespace ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 20

def get_event_id_namespace
  id = metadata.dig("namespaces", "eventID")

  if id.nil? || (@event_id_identifier_namespace ||= Namespace.find_by(id: id)).nil?
    random = SecureRandom.hex(4)
    project_name = Project.find(Current.project_id).name
    namespace_name = "eventID namespace for \"#{description}\" dataset in \"#{project_name}\" project [#{random}]"

    @event_id_identifier_namespace = Namespace.create!(
      name: namespace_name,
      short_name: "eventID-#{random}",
      verbatim_short_name: "eventID",
      delimiter: ':'
    )

    metadata["namespaces"]["eventID"] = @event_id_identifier_namespace.id
    save!
  end

  @event_id_identifier_namespace
end

#perform_staging ⇒ `Object`

Stages core (Occurrence) records and all extension records.

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 43

def perform_staging
  records, headers = get_records(source)

  update!(metadata:
    metadata.merge({
      core_headers: headers[:core],
      extensions_headers: headers[:extensions],
      catalog_numbers_namespaces: []
    })
  )

  core_records = records[:core].map do |record|
    {
      src_data: record,
      basisOfRecord: record["basisOfRecord"]
    }
  end

  catalog_numbers_namespaces = Set[]
  catalog_numbers_collection_code_namespaces = Set[]

  core_records.each do |record|
    dwc_occurrence = DatasetRecord::DarwinCore::Occurrence.new(import_dataset: self)
    dwc_occurrence.initialize_data_fields(record[:src_data].map { |k, v| v })

    catalog_numbers_namespaces << [
      [
        dwc_occurrence.get_field_value(:institutionCode),
        dwc_occurrence.get_field_value(:collectionCode)
      ],
      nil # User will select namespace through UI. TODO: Should we attempt guessing here?
    ]
    catalog_numbers_collection_code_namespaces << [dwc_occurrence.get_field_value(:collectionCode), nil]

    if dwc_occurrence.get_field_value(:catalogNumber).blank?
      dwc_occurrence.status = "Ready"
    else
      dwc_occurrence.status = "NotReady"
      record["error_data"] = { messages: { catalogNumber: ["Record cannot be imported until namespace is set."] } }
    end

    record.delete(:src_data)
    dwc_occurrence.metadata = record

    dwc_occurrence.save!
  end

  records[:extensions].each do |extension_type, records|
    records.each do |record|
      dwc_extension = DatasetRecord::DarwinCore::Extension.new(import_dataset: self)
      dwc_extension.initialize_data_fields(record.map { |k, v| v })
      dwc_extension.status = "Unsupported"
      dwc_extension.metadata = { "type" => extension_type }

      dwc_extension.save!
    end
  end

  self.metadata.merge!(
    catalog_numbers_namespaces: catalog_numbers_namespaces.sort { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  )
  self.metadata.merge!(
    catalog_numbers_collection_code_namespaces: catalog_numbers_collection_code_namespaces.sort { |a, b| a[0].to_s <=> b[0].to_s }
  )

  save!
end

#require_catalog_number_match_verbatim? ⇒ `Boolean`

Returns:

(Boolean)



234
235
236

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 234

def require_catalog_number_match_verbatim?
  !!self.metadata.dig("import_settings", "require_catalog_number_match_verbatim")
end

#require_tripcode_match_verbatim? ⇒ `Boolean`

Returns:

(Boolean)



230
231
232

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 230

def require_tripcode_match_verbatim?
  !!self.metadata.dig("import_settings", "require_tripcode_match_verbatim")
end

#require_type_material_success? ⇒ `Boolean`

Returns:

(Boolean)



226
227
228

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 226

def require_type_material_success?
  !!self.metadata.dig("import_settings", "require_type_material_success")
end

#restrict_to_existing_nomenclature? ⇒ `Boolean`

Returns:

(Boolean)



222
223
224

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 222

def restrict_to_existing_nomenclature?
  !!self.metadata.dig("import_settings", "restrict_to_existing_nomenclature")
end

#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 172

def update_catalog_number_collection_code_namespace(collection_code, namespace_id)
  return if collection_code.nil? # No support for mapping blank data at this time

  transaction do
    mapping = get_catalog_number_collection_code_namespace_mapping(collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    if ready
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).with_value(collection_code).select(:dataset_record_id)
      ).update_all(
        "status = 'Ready', metadata = metadata - 'error_data'"
      )
    else
      institution_codes = self.metadata["catalog_numbers_namespaces"]&.select { |m| m[0][1] == collection_code && m[1] }&.map { |m| m[0][0] } || []
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).with_value(collection_code).select(:dataset_record_id)
      ).where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).with_values(institution_codes).select(:dataset_record_id)
      ).update_all(
        "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
      )
    end
  end
end

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 136

def update_catalog_number_namespace(institution_code, collection_code, namespace_id)
  transaction do
    mapping = get_catalog_number_namespace_mapping(institution_code, collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    # TODO: Add scopes/methods in DatasetRecord to handle nil fields values transparently
    unless institution_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).with_value(institution_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).select(:dataset_record_id)
      )
    end
    unless collection_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).with_value(collection_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).select(:dataset_record_id)
      )
    end

    query.update_all(ready ?
      "status = 'Ready', metadata = metadata - 'error_data'" :
      "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
    )
  end
end

Class: ImportDataset::DarwinCore::Occurrences

Constant Summary collapse

Constants inherited from ImportDataset::DarwinCore

Instance Attribute Summary

Attributes inherited from ImportDataset

Instance Method Summary collapse

Methods inherited from ImportDataset::DarwinCore

Methods inherited from ImportDataset

Methods included from Shared::OriginRelationship

Methods included from Shared::IsData

Methods included from Housekeeping

Methods inherited from ApplicationRecord

Constructor Details

Instance Method Details

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ Object

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ Object

#check_field_set ⇒ Object

#containerize_dup_cat_no? ⇒ Boolean

#core_records_class ⇒ Object

#core_records_identifier_name ⇒ Object

#enable_organization_determiners? ⇒ Boolean

#enable_organization_determiners_alt_name? ⇒ Boolean

#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ Object (private)

#get_catalog_number_namespace(institution_code, collection_code) ⇒ Object

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ Object (private)

#get_event_id_namespace ⇒ Object

#perform_staging ⇒ Object

#require_catalog_number_match_verbatim? ⇒ Boolean

#require_tripcode_match_verbatim? ⇒ Boolean

#require_type_material_success? ⇒ Boolean

#restrict_to_existing_nomenclature? ⇒ Boolean

#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ Object

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ Object

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ `Object`

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ `Object`

#check_field_set ⇒ `Object`

#containerize_dup_cat_no? ⇒ `Boolean`

#core_records_class ⇒ `Object`

#core_records_identifier_name ⇒ `Object`

#enable_organization_determiners? ⇒ `Boolean`

#enable_organization_determiners_alt_name? ⇒ `Boolean`

#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ `Object` (private)

#get_catalog_number_namespace(institution_code, collection_code) ⇒ `Object`

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ `Object` (private)

#get_event_id_namespace ⇒ `Object`

#perform_staging ⇒ `Object`

#require_catalog_number_match_verbatim? ⇒ `Boolean`

#require_tripcode_match_verbatim? ⇒ `Boolean`

#require_type_material_success? ⇒ `Boolean`

#restrict_to_existing_nomenclature? ⇒ `Boolean`

#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ `Object`

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ `Object`