Class: UriService::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/uri_service/client.rb

Constant Summary collapse

ALPHANUMERIC_UNDERSCORE_KEY_REGEX =
/\A[a-z]+[a-z0-9_]*\z/
CORE_FIELD_NAMES =
['uri', 'vocabulary_string_key', 'value', 'authority', 'type', 'internal_id']
VALID_TYPES =
[UriService::TermType::EXTERNAL, UriService::TermType::LOCAL, UriService::TermType::TEMPORARY]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts) ⇒ Client

Returns a new instance of Client.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/uri_service/client.rb', line 9

def initialize(opts)
  raise UriService::InvalidOptsError, "Must supply opts['local_uri_base'] to initialize method." if opts['local_uri_base'].nil?
  raise UriService::InvalidOptsError, "Must supply opts['temporary_uri_base'] to initialize method." if opts['temporary_uri_base'].nil?
  raise UriService::InvalidOptsError, "Must supply opts['database'] to initialize method." if opts['database'].nil?
  raise UriService::InvalidOptsError, "Must supply opts['solr'] to initialize method." if opts['solr'].nil?

  # Set local_uri_base and temporary_uri_base
  @local_uri_base = opts['local_uri_base']
  @temporary_uri_base = opts['temporary_uri_base']

  # Create DB connection pool
  @db = Sequel.connect(opts['database'])

  # Create Solr connection pool
  @rsolr_pool = ConnectionPool.new( size: opts['solr']['pool_size'], timeout: (opts['solr']['pool_timeout'].to_f/1000.to_f) ) { RSolr.connect(:url => opts['solr']['url']) }

  @auto_commit_after_term_creation = opts['solr'].fetch('auto_commit_after_term_creation', true).to_s == 'true'
end

Instance Attribute Details

#dbObject (readonly)

Returns the value of attribute db.



3
4
5
# File 'lib/uri_service/client.rb', line 3

def db
  @db
end

#local_uri_baseObject (readonly)

Returns the value of attribute local_uri_base.



3
4
5
# File 'lib/uri_service/client.rb', line 3

def local_uri_base
  @local_uri_base
end

#rsolr_poolObject (readonly)

Returns the value of attribute rsolr_pool.



3
4
5
# File 'lib/uri_service/client.rb', line 3

def rsolr_pool
  @rsolr_pool
end

#temporary_uri_baseObject (readonly)

Returns the value of attribute temporary_uri_base.



3
4
5
# File 'lib/uri_service/client.rb', line 3

def temporary_uri_base
  @temporary_uri_base
end

Instance Method Details

#clear_solr_indexObject



499
500
501
502
503
504
# File 'lib/uri_service/client.rb', line 499

def clear_solr_index
  @rsolr_pool.with do |rsolr|
    rsolr.delete_by_query('*:*');
    rsolr.commit
  end
end

#connected?Boolean

Returns:

  • (Boolean)


85
86
87
88
89
90
91
92
93
94
# File 'lib/uri_service/client.rb', line 85

def connected?
  return false if @db.nil? || @rsolr_pool.nil?

  begin
    self.test_connection
    return true
  rescue Sequel::DatabaseConnectionError, Errno::ECONNREFUSED
    return false
  end
end

#create_required_tablesObject



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/uri_service/client.rb', line 107

def create_required_tables
  self.handle_database_disconnect do

    current_tables = @db.tables

    unless current_tables.include?(UriService::VOCABULARIES)
      @db.create_table UriService::VOCABULARIES do |t|
        primary_key :id
        String :string_key, size: 255, index: true, unique: true
        String :display_label, size: 255
      end
      puts 'Created table: ' + UriService::VOCABULARIES.to_s
    else
      puts 'Skipped creation of table ' + UriService::VOCABULARIES.to_s + ' because it already exists.'
    end

    unless current_tables.include?(UriService::TERMS)
      @db.create_table UriService::TERMS do |t|
        primary_key :id
        String :vocabulary_string_key, size: 255, index: true
        String :uri, text: true # This needs to be a text field because utf8 strings cannot be our desired 2000 characters long in MySQL. uri_hash will be used to verify uniqueness.
        String :uri_hash, fixed: true, size: 64, unique: true
        String :value, text: true
        String :value_hash, fixed: true, size: 64
        String :type, null: false
        String :authority, size: 255, index: true
        String :additional_fields, text: true
      end
      puts 'Created table: ' + UriService::TERMS.to_s
    else
      puts 'Skipped creation of table ' + UriService::TERMS.to_s + ' because it already exists.'
    end

  end
end

#create_term(type, opts) ⇒ Object

Creates a new term



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# File 'lib/uri_service/client.rb', line 168

def create_term(type, opts)
  raise UriService::InvalidTermTypeError, 'Invalid type: ' + type unless VALID_TYPES.include?(type)

  vocabulary_string_key = opts.delete(:vocabulary_string_key)
  value = opts.delete(:value)
  uri = opts.delete(:uri)
  authority = opts.has_key?(:authority) ? opts.delete(:authority) : ''
  authority = '' if authority.nil?
  additional_fields = opts.delete(:additional_fields) || {}

  if type == UriService::TermType::EXTERNAL
    # URI is required
    raise UriService::InvalidOptsError, "A uri must be supplied for terms of type #{type}." if uri.nil?

    return create_term_impl(type, vocabulary_string_key, value, uri, authority, additional_fields)
  else
    # URI should not be present
    raise UriService::InvalidOptsError, "A uri cannot supplied for term type: #{type}" unless uri.nil?

    if type == UriService::TermType::TEMPORARY
      # No two TEMPORARY terms within the same vocabulary can have the same value, so we generate a unique URI from a hash of the (vocabulary_string_key + value) to ensure uniqueness.
      uri = self.generate_uri_for_temporary_term(vocabulary_string_key, value)
      return create_term_impl(type, vocabulary_string_key, value, uri, authority, additional_fields)
    elsif type == UriService::TermType::LOCAL
      5.times {
        # We generate a unique URI for a local term from a UUID generator.
        # Getting a duplicate UUID from a call to SecureRandom.uuid is EXTREMELY unlikely,
        # but we'll account for it just in case by being ready to make multiple attempts.
        begin
          # Generate new URI for LOCAL and TEMPORARY terms
          uri = URI(@local_uri_base)
          uri.path += SecureRandom.uuid # Generate random UUID for local URI
          uri = uri.to_s
          return create_term_impl(type, vocabulary_string_key, value, uri, authority, additional_fields)
        rescue UriService::ExistingUriError
          next
        end
      }
      # Probabilistically, the error below should never be raised.
      raise UriService::CouldNotGenerateUriError, "UriService generated a duplicate random UUID (via SecureRandom.uuid) too many times in a row.  Probabilistically, this should never happen."
    end

  end
end

#create_term_solr_doc(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id) ⇒ Object



240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/uri_service/client.rb', line 240

def create_term_solr_doc(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id)
  doc = {}
  doc['uri'] = uri
  doc['value'] = value
  doc['type'] = type
  doc['vocabulary_string_key'] = vocabulary_string_key
  doc['authority'] = authority
  doc['internal_id'] = internal_id

  doc['additional_fields'] = JSON.generate(additional_fields)

  return doc
end

#create_vocabulary(string_key, display_label) ⇒ Object

Create methods #



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/uri_service/client.rb', line 147

def create_vocabulary(string_key, display_label)
  self.handle_database_disconnect do
    if string_key.to_s == 'all'
      # Note: There isn't currently a use case for searching across 'all' vocabularies, but I'm leaving this restriction as a placeholder in case that changes.
      raise UriService::InvalidVocabularyStringKeyError, 'The value "all" is a reserved word and cannot be used as the string_key value for a vocabulary.'
    end
    unless string_key =~ ALPHANUMERIC_UNDERSCORE_KEY_REGEX
      raise UriService::InvalidVocabularyStringKeyError, "Invalid key (can only include lower case letters, numbers or underscores, but cannot start with an underscore): " + string_key
    end

    @db.transaction do
      begin
        @db[UriService::VOCABULARIES].insert(string_key: string_key, display_label: display_label)
      rescue Sequel::UniqueConstraintViolation
        raise UriService::ExistingVocabularyStringKeyError, "A vocabulary already exists with string key: " + string_key
      end
    end
  end
end

#delete_term(uri, commit = true) ⇒ Object



422
423
424
425
426
427
428
429
430
431
432
# File 'lib/uri_service/client.rb', line 422

def delete_term(uri, commit=true)
  self.handle_database_disconnect do
    @db.transaction do
      @db[UriService::TERMS].where(uri: uri).delete
      @rsolr_pool.with do |rsolr|
        rsolr.delete_by_query('uri:' + UriService.solr_escape(uri))
        rsolr.commit if commit
      end
    end
  end
end

#delete_vocabulary(vocabulary_string_key) ⇒ Object

Delete methods #



416
417
418
419
420
# File 'lib/uri_service/client.rb', line 416

def delete_vocabulary(vocabulary_string_key)
  self.handle_database_disconnect do
    @db[UriService::VOCABULARIES].where(string_key: vocabulary_string_key).delete
  end
end

#disconnect!Object



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/uri_service/client.rb', line 69

def disconnect!
  unless @db.nil?
    db_reference = @db
    @db = nil
    db_reference.disconnect
  end

  unless @rsolr_pool.nil?
    rsolr_pool_reference = @rsolr_pool
    @rsolr_pool = nil
    rsolr_pool_reference.shutdown{|rsolr|}  # connection_pool gem docs say that shutting down is
                                            # optional and pool would be garbage collected anyway,
                                            # but this doesn't hurt.
  end
end

#do_solr_commitObject



493
494
495
496
497
# File 'lib/uri_service/client.rb', line 493

def do_solr_commit
  @rsolr_pool.with do |rsolr|
    rsolr.commit
  end
end

#find_term_by_internal_id(id) ⇒ Object

Finds the term with the given uri



292
293
294
295
# File 'lib/uri_service/client.rb', line 292

def find_term_by_internal_id(id)
  results = self.find_terms_where({internal_id: id}, 1)
  return results.length == 1 ? results.first : nil
end

#find_term_by_uri(uri) ⇒ Object

Finds the term with the given uri



286
287
288
289
# File 'lib/uri_service/client.rb', line 286

def find_term_by_uri(uri)
  results = self.find_terms_where({uri: uri}, 1)
  return results.length == 1 ? results.first : nil
end

#find_terms_by_query(vocabulary_string_key, value_query, limit = 10, start = 0) ⇒ Object



342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
# File 'lib/uri_service/client.rb', line 342

def find_terms_by_query(vocabulary_string_key, value_query, limit=10, start=0)

  if value_query.blank?
    return self.list_terms(vocabulary_string_key, limit, start)
  end

  terms_to_return = []
  @rsolr_pool.with do |rsolr|

    solr_params = {
      :q => UriService.solr_escape(value_query),
      :fq => 'vocabulary_string_key:' + UriService.solr_escape(vocabulary_string_key),
      :rows => limit,
      :start => start,
      :sort => 'score desc, value_ssort asc, uri asc' # For consistent sorting
    }

    if value_query.length < 3
      # For efficiency, we only do whole term matches for queries < 3 characters
      solr_params[:qf] = 'value_suggest'
      solr_params[:pf] = 'value_suggest'
    end

    response = rsolr.get('suggest', params: solr_params)

    if response['response']['numFound'] > 0
      response['response']['docs'].each do |doc|
        terms_to_return << term_solr_doc_to_frozen_term_hash(doc)
      end
    end
  end
  return terms_to_return
end

#find_terms_where(opts, limit = 10) ⇒ Object

Finds terms that match the specified conditions



298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# File 'lib/uri_service/client.rb', line 298

def find_terms_where(opts, limit=10)
  fqs = []

  # Only search on allowed fields
  unsupported_search_fields = opts.map{|key, val| key.to_s} - CORE_FIELD_NAMES
  raise UriService::UnsupportedSearchFieldError, "Unsupported search fields: #{unsupported_search_fields.join(', ')}" if unsupported_search_fields.present?

  opts.each do |field_name, val|
    fqs << (field_name.to_s + ':"' + UriService.solr_escape(val.to_s) + '"')
  end

  @rsolr_pool.with do |rsolr|
    response = rsolr.get('select', params: {
      :q => '*:*',
      :fq => fqs,
      :rows => limit,
      :sort => 'value_ssort asc, uri asc' # For consistent sorting
      # Note: We don't sort by solr score because solr fq searches don't factor into the score
    })
    if response['response']['docs'].length > 0
      arr_to_return = []
      response['response']['docs'].each do |doc|
        arr_to_return << term_solr_doc_to_frozen_term_hash(doc)
      end
      return arr_to_return
    else
      return []
    end
  end
end

#find_vocabulary(vocabulary_string_key) ⇒ Object

Find methods #



279
280
281
282
283
# File 'lib/uri_service/client.rb', line 279

def find_vocabulary(vocabulary_string_key)
  self.handle_database_disconnect do
    @db[UriService::VOCABULARIES].where(string_key: vocabulary_string_key).first
  end
end

#generate_frozen_term_hash(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id) ⇒ Object



218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/uri_service/client.rb', line 218

def generate_frozen_term_hash(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id)
  hash_to_return = {}
  hash_to_return['uri'] = uri
  hash_to_return['value'] = value
  hash_to_return['type'] = type
  hash_to_return['authority'] = authority unless authority == ''
  hash_to_return['vocabulary_string_key'] = vocabulary_string_key
  hash_to_return['internal_id'] = internal_id

  additional_fields.each do |key, val|
    hash_to_return[key] = val
  end

  # Delete nil values
  hash_to_return.delete_if { |k, v| v.nil? }

  # Freeze hash
  hash_to_return.freeze # To make this a read-only hash

  return hash_to_return
end

#generate_uri_for_temporary_term(vocabulary_string_key, term_value) ⇒ Object



213
214
215
216
# File 'lib/uri_service/client.rb', line 213

def generate_uri_for_temporary_term(vocabulary_string_key, term_value)
  uri = URI(@temporary_uri_base + Digest::SHA256.hexdigest(vocabulary_string_key + term_value))
  return uri.to_s
end

#handle_database_disconnectObject



483
484
485
486
487
488
489
490
491
# File 'lib/uri_service/client.rb', line 483

def handle_database_disconnect
  tries ||= 3
  begin
    yield
  rescue Sequel::DatabaseDisconnectError
    tries -= 1
    retry unless tries == 0
  end
end

#list_terms(vocabulary_string_key, limit = 10, start = 0) ⇒ Object

Lists terms alphabetically and supports paging through results. Useful for browsing through a term list without a query.



390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
# File 'lib/uri_service/client.rb', line 390

def list_terms(vocabulary_string_key, limit=10, start=0)
  terms_to_return = []
  @rsolr_pool.with do |rsolr|

    solr_params = {
      :fq => 'vocabulary_string_key:' + UriService.solr_escape(vocabulary_string_key),
      :q => '*:*',
      :rows => limit,
      :start => start,
      :sort => 'value_ssort asc, uri asc' # Include 'uri asc' as part of sort to ensure consistent sorting
    }

    response = rsolr.get('select', params: solr_params)
    if response['response']['numFound'] > 0
      response['response']['docs'].each do |doc|
        terms_to_return << term_solr_doc_to_frozen_term_hash(doc)
      end
    end
  end
  return terms_to_return
end

#list_vocabularies(limit = 10, start = 0) ⇒ Object

Lists vocabularies alphabetically (by string key) and supports paging through results.



381
382
383
384
385
386
# File 'lib/uri_service/client.rb', line 381

def list_vocabularies(limit=10, start=0)
  self.handle_database_disconnect do
    db_rows = @db[UriService::VOCABULARIES].order(:string_key).limit(limit, start)
    return db_rows.map{|row| row.except(:id).stringify_keys!}
  end
end

#reindex_all_terms(clear = false, print_progress_to_console = false) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/uri_service/client.rb', line 28

def reindex_all_terms(clear=false, print_progress_to_console=false)
  self.handle_database_disconnect do

    if print_progress_to_console
      puts "Getting database term count..."
      total = @db[UriService::TERMS].count
      reindex_counter = 0
      puts "Number of terms to index: #{total.to_s}"
      puts ""
    end

    if clear
      @rsolr_pool.with do |rsolr|
        rsolr.delete_by_query('*:*');
      end
    end

    # Need to use unambiguous order when using paged_each, so we choose to order by DB :id
    @db[UriService::TERMS].order(:id).paged_each(:rows_per_fetch=>100) do |term_db_row|
      self.send_term_to_solr(
        term_db_row[:vocabulary_string_key],
        term_db_row[:value],
        term_db_row[:uri],
        term_db_row[:authority],
        JSON.parse(term_db_row[:additional_fields]),
        term_db_row[:type],
        term_db_row[:id],
      false)

      if print_progress_to_console
        reindex_counter += 1
        print "\rIndexed #{reindex_counter.to_s} of #{total.to_s}"
      end
    end

    puts "\n" + "Committing solr updates..." if print_progress_to_console
    self.do_solr_commit
    puts "Done." if print_progress_to_console
  end
end

#required_tables_exist?Boolean

Returns:

  • (Boolean)


103
104
105
# File 'lib/uri_service/client.rb', line 103

def required_tables_exist?
  return (UriService.required_tables - @db.tables).length == 0
end

#send_term_to_solr(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id, commit = @auto_commit_after_term_creation) ⇒ Object

Index the DB row term data into solr



255
256
257
258
259
260
261
# File 'lib/uri_service/client.rb', line 255

def send_term_to_solr(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id, commit = @auto_commit_after_term_creation)
  doc = create_term_solr_doc(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id)
  @rsolr_pool.with do |rsolr|
    rsolr.add(doc)
    rsolr.commit if commit
  end
end

#term_solr_doc_to_frozen_term_hash(term_solr_doc) ⇒ Object



329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/uri_service/client.rb', line 329

def term_solr_doc_to_frozen_term_hash(term_solr_doc)

  uri = term_solr_doc.delete('uri')
  vocabulary_string_key = term_solr_doc.delete('vocabulary_string_key')
  value = term_solr_doc.delete('value')
  authority = term_solr_doc.delete('authority')
  type = term_solr_doc.delete('type')
  additional_fields = JSON.parse(term_solr_doc.delete('additional_fields'))
  internal_id = term_solr_doc.delete('internal_id')

  return generate_frozen_term_hash(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id)
end

#test_connectionObject



96
97
98
99
100
101
# File 'lib/uri_service/client.rb', line 96

def test_connection
  @db.test_connection # Raises Sequel::DatabaseConnectionError if connection didn't work
  @rsolr_pool.with do |rsolr|
    rsolr.get('admin/ping') # Raises Errno::ECONNREFUSED if connection didn't work
  end
end

#update_term(uri, opts, merge_additional_fields = true) ⇒ Object

opts format: => ‘new value’, :authority => ‘newauthority’, :additional_fields => {‘key’ => ‘value’}



450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
# File 'lib/uri_service/client.rb', line 450

def update_term(uri, opts, merge_additional_fields=true)
  self.handle_database_disconnect do
    term_db_row = @db[UriService::TERMS].first(uri: uri)
    raise UriService::NonExistentUriError, "No term found with uri: " + uri if term_db_row.nil?

    new_value = opts[:value] || term_db_row[:value]
    new_authority = opts[:authority] || term_db_row[:authority]
    new_additional_fields = term_db_row[:additional_fields].nil? ? {} : JSON.parse(term_db_row[:additional_fields])

    if term_db_row[:type] == UriService::TermType::TEMPORARY && new_value != term_db_row[:value]
      # TEMPORARY terms cannot have their values changed, but it is possible to update other fields
      raise UriService::CannotChangeTemporaryTermValue, "The value of a temporary term cannot be changed. Delete unusued temporary terms or create a new one with a different value."
    end

    unless opts[:additional_fields].nil?
      if merge_additional_fields
        new_additional_fields.merge!(opts[:additional_fields])
        new_additional_fields.delete_if { |k, v| v.nil? } # Delete nil values. This is a way to clear data in additional_fields.
      else
        new_additional_fields = opts[:additional_fields]
      end
    end
    validate_additional_field_keys(new_additional_fields)

    @db.transaction do
      @db[UriService::TERMS].where(uri: uri).update(value: new_value, value_hash: Digest::SHA256.hexdigest(new_value), authority: new_authority, additional_fields: JSON.generate(new_additional_fields))
      self.send_term_to_solr(term_db_row[:vocabulary_string_key], new_value, uri, new_authority, new_additional_fields, term_db_row[:type], term_db_row[:id])
    end

    return generate_frozen_term_hash(term_db_row[:vocabulary_string_key], new_value, uri, new_authority, new_additional_fields, term_db_row[:type], term_db_row[:id])
  end
end

#update_vocabulary(string_key, new_display_label) ⇒ Object

Update methods #



438
439
440
441
442
443
444
445
446
447
# File 'lib/uri_service/client.rb', line 438

def update_vocabulary(string_key, new_display_label)
  self.handle_database_disconnect do
    dataset = @db[UriService::VOCABULARIES].where(string_key: string_key)
    raise UriService::NonExistentVocabularyError, "No vocabulary found with string_key: " + string_key if dataset.count == 0

    @db.transaction do
      dataset.update(display_label: new_display_label)
    end
  end
end

#validate_additional_field_keys(additional_fields) ⇒ Object

Validates additional_fields and verifies that no reserved words are supplied



264
265
266
267
268
269
270
271
272
273
# File 'lib/uri_service/client.rb', line 264

def validate_additional_field_keys(additional_fields)
  additional_fields.each do |key, value|
    if CORE_FIELD_NAMES.include?(key.to_s)
      raise UriService::InvalidAdditionalFieldKeyError, "Cannot supply the key \"#{key.to_s}\" as an additional field because it is a reserved key."
    end
    unless key.to_s =~ ALPHANUMERIC_UNDERSCORE_KEY_REGEX
      raise UriService::InvalidAdditionalFieldKeyError, "Invalid key (can only include lower case letters, numbers or underscores, but cannot start with an underscore): " + key
    end
  end
end