Class: Uc3DmpId::Finder

Inherits:
Object
  • Object
show all
Defined in:
lib/uc3-dmp-id/finder.rb

Overview

Methods to find/search for DMP IDs

Constant Summary collapse

MSG_INVALID_ARGS =
'Expected JSON to be structured as `{ "dmp": { "PK": "value"} } OR \
{ "dmp": { "dmp_id": { "identifier": "value", "type": "value" } }`'
MSG_INVALID_OWNER_ID =
'Invalid :owner_orcid. Expected a valid ORCID id (excluding the domain)`.'
MSG_INVALID_OWNER_ORG =
'Invalid :owner_org_ror. Expected a valid ROR id (excluding the domain)`.'
MSG_INVALID_MOD_DATE =
'Invalid :modification_day. Expected value to be in the `YYYY-MM-DD` format.'
MSG_MISSING_PK =
'No PK was provided'
MSG_MISSING_PROV_ID =
'No Provenance identifier was provided. \
Expected: `{ "dmp_id": { "identifier": "value", "type": "value" }`'
ORCID_DOMAIN =
'https://orcid.org/'
ROR_DOMAIN =
'https://ror.org/'
SORT_OPTIONS =
%w[title modified]
SORT_DIRECTIONS =
%w[asc desc]
MAX_PAGE_SIZE =
100
DEFAULT_PAGE_SIZE =
25
DEFAULT_SORT_OPTION =
'modified'
DEFAULT_SORT_DIR =
'desc'

Class Method Summary collapse

Class Method Details

.by_json(json:, client: nil, cleanse: true, logger: nil) ⇒ Object

Find a DMP based on the contents of the incoming JSON


rubocop:disable Metrics/AbcSize

Raises:



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/uc3-dmp-id/finder.rb', line 57

def by_json(json:, client: nil, cleanse: true, logger: nil)
  json = Helper.parse_json(json:)&.fetch('dmp', {})
  raise FinderError, MSG_INVALID_ARGS if !json.is_a?(Hash) || (json['PK'].nil? && json['dmp_id'].nil?)

  p_key = json['PK']
  # Translate the incoming :dmp_id into a PK
  p_key = Helper.dmp_id_to_pk(json: json.fetch('dmp_id', {})) if p_key.nil?
  client = Uc3DmpDynamo::Client.new if client.nil?

  # TODO: Re-enable this once we figure out Dynamo indexes
  # find_by_dmphub_provenance_id -> if no PK and no dmp_id result
  # return by_provenance_identifier(json: json, client: client, logger: logger) if p_key.nil?

  # find_by_PK
  p_key.nil? ? nil : by_pk(p_key:, s_key: json['SK'], client:, cleanse:, logger:)
end

.by_pk(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, cleanse: true, logger: nil) ⇒ Object

Find the DMP by its PK and SK


rubocop:disable Metrics/AbcSize

Raises:



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/uc3-dmp-id/finder.rb', line 78

def by_pk(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, cleanse: true, logger: nil)
  raise FinderError, MSG_MISSING_PK if p_key.nil?

  s_key = Helper::DMP_LATEST_VERSION if s_key.nil? || s_key.to_s.strip.empty?
  client = Uc3DmpDynamo::Client.new if client.nil?
  resp = client.get_item(
    key: {
      PK: Helper.append_pk_prefix(p_key:),
      SK: Helper.append_sk_prefix(s_key:)
    },
    logger:
  )
  return resp unless resp.is_a?(Hash)

  dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
  return nil if dmp['dmp']['PK'].nil?

  # Attach any harvester mods to the JSON
  dmp['dmp'] = _attach_harvester_mods(client:, p_key:, json: dmp['dmp'], logger:)

  dmp = Versioner.append_versions(p_key: dmp['dmp']['PK'], dmp:, client:, logger:) if cleanse
  dmp = _remove_narrative_if_private(json: dmp)
  cleanse ? Helper.cleanse_dmp_json(json: dmp) : dmp
end

.by_provenance_identifier(json:, client: nil, cleanse: true, logger: nil) ⇒ Object

Attempt to find the DMP item by the provenance system’s identifier


rubocop:disable Metrics/AbcSize

Raises:



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/uc3-dmp-id/finder.rb', line 122

def by_provenance_identifier(json:, client: nil, cleanse: true, logger: nil)
  raise FinderError, MSG_MISSING_PROV_ID unless json.is_a?(Hash)

  json = json['dmp'] unless json['dmp'].nil?
  raise FinderError, MSG_MISSING_PROV_ID if json.fetch('dmp_id', {})['identifier'].nil?

  args = {
    index_name: 'dmphub_provenance_identifier_gsi',
    key_conditions: {
      dmphub_provenance_identifier: {
        attribute_value_list: [json['dmp_id']['identifier']],
        comparison_operator: 'EQ'
      }
    },
    filter_expression: 'SK = :version',
    expression_attribute_values: { ':version': Helper::DMP_LATEST_VERSION }
  }
  client = Uc3DmpDynamo::Client.new if client.nil?
  resp = client.query(args:, logger:)
  return resp unless resp.is_a?(Hash)

  dmp = resp['dmp'].nil? ? JSON.parse({ dmp: resp }.to_json) : resp
  return nil if dmp['dmp']['PK'].nil?

  # If we got a hit, fetch the DMP and return it.
  by_pk(p_key: dmp['dmp']['PK'], s_key: dmp['dmp']['SK'], cleanse:, logger:)
end

.exists?(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, logger: nil) ⇒ Boolean

Fetch just the PK to see if a record exists


Returns:

  • (Boolean)

Raises:



106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/uc3-dmp-id/finder.rb', line 106

def exists?(p_key:, s_key: Helper::DMP_LATEST_VERSION, client: nil, logger: nil)
  raise FinderError, MSG_MISSING_PK if p_key.nil?

  client = Uc3DmpDynamo::Client.new if client.nil?
  client.pk_exists?(
    key: {
      PK: Helper.append_pk_prefix(p_key:),
      SK: Helper.append_sk_prefix(s_key:)
    },
    logger:
  )
end

.search_dmps(args:, logger: nil) ⇒ Object

TODO: Replace this with ElasticSearch



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/uc3-dmp-id/finder.rb', line 31

def search_dmps(args:, logger: nil)
  # Fetch the DMPs for each of the possible filter options
  client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_INDEX_TABLE'])
  owner = args['owner']
  org = args['org']
  funder = args['funder']

  owner_pks = owner.nil? ? [] : _by_owner(owner: owner, client:, logger:)
  org_pks = org.nil? ? [] : _by_org(org: org, client:, logger:)
  funder_pks = funder.nil? ? [] : _by_funder(funder: funder, client:, logger:)
  pks = [owner_pks, org_pks, funder_pks].reject(&:empty?)
  return [] if owner_pks.empty? && org_pks.empty? && funder_pks.empty?

  # Only use the DMPs that fit all of the filter criteria
  dmps = pks.reduce(:&).flatten.uniq
  return [] if dmps.nil? || dmps.empty?


  # Fetch full DMP records for the results
  client = Uc3DmpDynamo::Client.new(table: ENV['DYNAMO_TABLE'])
  _fetch_dmps(client:, pks: dmps.map { |dmp| dmp['pk'] }, logger:)
end