Class: UriService::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/uri_service/client.rb

Constant Summary collapse

ALPHANUMERIC_UNDERSCORE_KEY_REGEX =
/\A[a-z]+[a-z0-9_]*\z/
CORE_FIELD_NAMES =
['uri', 'vocabulary_string_key', 'value', 'authority', 'type', 'internal_id']
VALID_TYPES =
[UriService::TermType::EXTERNAL, UriService::TermType::LOCAL, UriService::TermType::TEMPORARY]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts) ⇒ Client

Returns a new instance of Client.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/uri_service/client.rb', line 9

def initialize(opts)
  raise UriService::InvalidOptsError, "Must supply opts['local_uri_base'] to initialize method." if opts['local_uri_base'].nil?
  raise UriService::InvalidOptsError, "Must supply opts['temporary_uri_base'] to initialize method." if opts['temporary_uri_base'].nil?
  raise UriService::InvalidOptsError, "Must supply opts['database'] to initialize method." if opts['database'].nil?
  raise UriService::InvalidOptsError, "Must supply opts['solr'] to initialize method." if opts['solr'].nil?
  
  # Set local_uri_base and temporary_uri_base
  @local_uri_base = opts['local_uri_base']
  @temporary_uri_base = opts['temporary_uri_base']
  
  # Create DB connection pool
  @db = Sequel.connect(opts['database'])
  
  # Create Solr connection pool
  @rsolr_pool = ConnectionPool.new( size: opts['solr']['pool_size'], timeout: (opts['solr']['pool_timeout'].to_f/1000.to_f) ) { RSolr.connect(:url => opts['solr']['url']) }
end

Instance Attribute Details

#dbObject (readonly)

Returns the value of attribute db.



3
4
5
# File 'lib/uri_service/client.rb', line 3

def db
  @db
end

#local_uri_baseObject (readonly)

Returns the value of attribute local_uri_base.



3
4
5
# File 'lib/uri_service/client.rb', line 3

def local_uri_base
  @local_uri_base
end

#rsolr_poolObject (readonly)

Returns the value of attribute rsolr_pool.



3
4
5
# File 'lib/uri_service/client.rb', line 3

def rsolr_pool
  @rsolr_pool
end

#temporary_uri_baseObject (readonly)

Returns the value of attribute temporary_uri_base.



3
4
5
# File 'lib/uri_service/client.rb', line 3

def temporary_uri_base
  @temporary_uri_base
end

Instance Method Details

#clear_solr_indexObject



497
498
499
500
501
502
# File 'lib/uri_service/client.rb', line 497

def clear_solr_index
  @rsolr_pool.with do |rsolr|
    rsolr.delete_by_query('*:*');
    rsolr.commit
  end
end

#connected?Boolean

Returns:

  • (Boolean)


83
84
85
86
87
88
89
90
91
92
# File 'lib/uri_service/client.rb', line 83

def connected?
  return false if @db.nil? || @rsolr_pool.nil?
  
  begin
    self.test_connection
    return true
  rescue Sequel::DatabaseConnectionError, Errno::ECONNREFUSED
    return false
  end
end

#create_required_tablesObject



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/uri_service/client.rb', line 105

def create_required_tables
  self.handle_database_disconnect do
  
    current_tables = @db.tables
    
    unless current_tables.include?(UriService::VOCABULARIES)
      @db.create_table UriService::VOCABULARIES do |t|
        primary_key :id
        String :string_key, size: 255, index: true, unique: true
        String :display_label, size: 255
      end
      puts 'Created table: ' + UriService::VOCABULARIES.to_s
    else
      puts 'Skipped creation of table ' + UriService::VOCABULARIES.to_s + ' because it already exists.'
    end
    
    unless current_tables.include?(UriService::TERMS)
      @db.create_table UriService::TERMS do |t|
        primary_key :id
        String :vocabulary_string_key, size: 255, index: true
        String :uri, text: true # This needs to be a text field because utf8 strings cannot be our desired 2000 characters long in MySQL. uri_hash will be used to verify uniqueness.
        String :uri_hash, fixed: true, size: 64, unique: true
        String :value, text: true
        String :value_hash, fixed: true, size: 64
        String :type, null: false
        String :authority, size: 255, index: true
        String :additional_fields, text: true
      end
      puts 'Created table: ' + UriService::TERMS.to_s
    else
      puts 'Skipped creation of table ' + UriService::TERMS.to_s + ' because it already exists.'
    end
    
  end
end

#create_term(type, opts) ⇒ Object

Creates a new term



166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/uri_service/client.rb', line 166

def create_term(type, opts)
  raise UriService::InvalidTermTypeError, 'Invalid type: ' + type unless VALID_TYPES.include?(type)
  
  vocabulary_string_key = opts.delete(:vocabulary_string_key)
  value = opts.delete(:value)
  uri = opts.delete(:uri)
  authority = opts.has_key?(:authority) ? opts.delete(:authority) : ''
  authority = '' if authority.nil?
  additional_fields = opts.delete(:additional_fields) || {}
  
  if type == UriService::TermType::EXTERNAL
    # URI is required
    raise UriService::InvalidOptsError, "A uri must be supplied for terms of type #{type}." if uri.nil?
    
    return create_term_impl(type, vocabulary_string_key, value, uri, authority, additional_fields)
  else
    # URI should not be present
    raise UriService::InvalidOptsError, "A uri cannot supplied for term type: #{type}" unless uri.nil?
    
    if type == UriService::TermType::TEMPORARY
      # No two TEMPORARY terms within the same vocabulary can have the same value, so we generate a unique URI from a hash of the (vocabulary_string_key + value) to ensure uniqueness.
      uri = self.generate_uri_for_temporary_term(vocabulary_string_key, value)
      return create_term_impl(type, vocabulary_string_key, value, uri, authority, additional_fields)
    elsif type == UriService::TermType::LOCAL
      5.times {
        # We generate a unique URI for a local term from a UUID generator.
        # Getting a duplicate UUID from a call to SecureRandom.uuid is EXTREMELY unlikely,
        # but we'll account for it just in case by being ready to make multiple attempts.
        begin
          # Generate new URI for LOCAL and TEMPORARY terms
          uri = URI(@local_uri_base)
          uri.path += SecureRandom.uuid # Generate random UUID for local URI
          uri = uri.to_s
          return create_term_impl(type, vocabulary_string_key, value, uri, authority, additional_fields)
        rescue UriService::ExistingUriError
          next
        end
      }
      # Probabilistically, the error below should never be raised.
      raise UriService::CouldNotGenerateUriError, "UriService generated a duplicate random UUID (via SecureRandom.uuid) too many times in a row.  Probabilistically, this should never happen."
    end
    
  end
end

#create_term_solr_doc(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id) ⇒ Object



238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/uri_service/client.rb', line 238

def create_term_solr_doc(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id)
  doc = {}
  doc['uri'] = uri
  doc['value'] = value
  doc['type'] = type
  doc['vocabulary_string_key'] = vocabulary_string_key
  doc['authority'] = authority
  doc['internal_id'] = internal_id
  
  doc['additional_fields'] = JSON.generate(additional_fields)
  
  return doc
end

#create_vocabulary(string_key, display_label) ⇒ Object

Create methods #



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/uri_service/client.rb', line 145

def create_vocabulary(string_key, display_label)
  self.handle_database_disconnect do
    if string_key.to_s == 'all'
      # Note: There isn't currently a use case for searching across 'all' vocabularies, but I'm leaving this restriction as a placeholder in case that changes.
      raise UriService::InvalidVocabularyStringKeyError, 'The value "all" is a reserved word and cannot be used as the string_key value for a vocabulary.'
    end
    unless string_key =~ ALPHANUMERIC_UNDERSCORE_KEY_REGEX
      raise UriService::InvalidVocabularyStringKeyError, "Invalid key (can only include lower case letters, numbers or underscores, but cannot start with an underscore): " + string_key
    end
    
    @db.transaction do
      begin
        @db[UriService::VOCABULARIES].insert(string_key: string_key, display_label: display_label)
      rescue Sequel::UniqueConstraintViolation
        raise UriService::ExistingVocabularyStringKeyError, "A vocabulary already exists with string key: " + string_key
      end
    end
  end
end

#delete_term(uri, commit = true) ⇒ Object



420
421
422
423
424
425
426
427
428
429
430
# File 'lib/uri_service/client.rb', line 420

def delete_term(uri, commit=true)
  self.handle_database_disconnect do
    @db.transaction do
      @db[UriService::TERMS].where(uri: uri).delete
      @rsolr_pool.with do |rsolr|
        rsolr.delete_by_query('uri:' + UriService.solr_escape(uri))
        rsolr.commit if commit
      end
    end
  end
end

#delete_vocabulary(vocabulary_string_key) ⇒ Object

Delete methods #



414
415
416
417
418
# File 'lib/uri_service/client.rb', line 414

def delete_vocabulary(vocabulary_string_key)
  self.handle_database_disconnect do
    @db[UriService::VOCABULARIES].where(string_key: vocabulary_string_key).delete
  end
end

#disconnect!Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/uri_service/client.rb', line 67

def disconnect!
  unless @db.nil?
    db_reference = @db
    @db = nil
    db_reference.disconnect
  end
  
  unless @rsolr_pool.nil?
    rsolr_pool_reference = @rsolr_pool
    @rsolr_pool = nil
    rsolr_pool_reference.shutdown{|rsolr|}  # connection_pool gem docs say that shutting down is
                                            # optional and pool would be garbage collected anyway,
                                            # but this doesn't hurt.
  end
end

#do_solr_commitObject



491
492
493
494
495
# File 'lib/uri_service/client.rb', line 491

def do_solr_commit
  @rsolr_pool.with do |rsolr|
    rsolr.commit
  end
end

#find_term_by_internal_id(id) ⇒ Object

Finds the term with the given uri



290
291
292
293
# File 'lib/uri_service/client.rb', line 290

def find_term_by_internal_id(id)
  results = self.find_terms_where({internal_id: id}, 1)
  return results.length == 1 ? results.first : nil
end

#find_term_by_uri(uri) ⇒ Object

Finds the term with the given uri



284
285
286
287
# File 'lib/uri_service/client.rb', line 284

def find_term_by_uri(uri)
  results = self.find_terms_where({uri: uri}, 1)
  return results.length == 1 ? results.first : nil
end

#find_terms_by_query(vocabulary_string_key, value_query, limit = 10, start = 0) ⇒ Object



340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/uri_service/client.rb', line 340

def find_terms_by_query(vocabulary_string_key, value_query, limit=10, start=0)
  
  if value_query.blank?
    return self.list_terms(vocabulary_string_key, limit, start)
  end
  
  terms_to_return = []
  @rsolr_pool.with do |rsolr|
    
    solr_params = {
      :q => UriService.solr_escape(value_query),
      :fq => 'vocabulary_string_key:' + UriService.solr_escape(vocabulary_string_key),
      :rows => limit,
      :start => start,
      :sort => 'score desc, value_ssort asc, uri asc' # For consistent sorting
    }
    
    if value_query.length < 3
      # For efficiency, we only do whole term matches for queries < 3 characters
      solr_params[:qf] = 'value_suggest'
      solr_params[:pf] = 'value_suggest'
    end
      
    response = rsolr.get('suggest', params: solr_params)
    
    if response['response']['numFound'] > 0
      response['response']['docs'].each do |doc|
        terms_to_return << term_solr_doc_to_frozen_term_hash(doc)
      end
    end
  end
  return terms_to_return
end

#find_terms_where(opts, limit = 10) ⇒ Object

Finds terms that match the specified conditions



296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/uri_service/client.rb', line 296

def find_terms_where(opts, limit=10)
  fqs = []
  
  # Only search on allowed fields
  unsupported_search_fields = opts.map{|key, val| key.to_s} - CORE_FIELD_NAMES
  raise UriService::UnsupportedSearchFieldError, "Unsupported search fields: #{unsupported_search_fields.join(', ')}" if unsupported_search_fields.present?
  
  opts.each do |field_name, val|
    fqs << (field_name.to_s + ':"' + UriService.solr_escape(val.to_s) + '"')
  end
  
  @rsolr_pool.with do |rsolr|
    response = rsolr.get('select', params: {
      :q => '*:*',
      :fq => fqs,
      :rows => limit,
      :sort => 'value_ssort asc, uri asc' # For consistent sorting
      # Note: We don't sort by solr score because solr fq searches don't factor into the score
    })
    if response['response']['docs'].length > 0
      arr_to_return = []
      response['response']['docs'].each do |doc|
        arr_to_return << term_solr_doc_to_frozen_term_hash(doc)
      end
      return arr_to_return
    else
      return []
    end
  end
end

#find_vocabulary(vocabulary_string_key) ⇒ Object

Find methods #



277
278
279
280
281
# File 'lib/uri_service/client.rb', line 277

def find_vocabulary(vocabulary_string_key)
  self.handle_database_disconnect do
    @db[UriService::VOCABULARIES].where(string_key: vocabulary_string_key).first
  end
end

#generate_frozen_term_hash(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id) ⇒ Object



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/uri_service/client.rb', line 216

def generate_frozen_term_hash(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id)
  hash_to_return = {}
  hash_to_return['uri'] = uri
  hash_to_return['value'] = value
  hash_to_return['type'] = type
  hash_to_return['authority'] = authority unless authority == ''
  hash_to_return['vocabulary_string_key'] = vocabulary_string_key
  hash_to_return['internal_id'] = internal_id
  
  additional_fields.each do |key, val|
    hash_to_return[key] = val
  end
  
  # Delete nil values
  hash_to_return.delete_if { |k, v| v.nil? }
  
  # Freeze hash
  hash_to_return.freeze # To make this a read-only hash
  
  return hash_to_return
end

#generate_uri_for_temporary_term(vocabulary_string_key, term_value) ⇒ Object



211
212
213
214
# File 'lib/uri_service/client.rb', line 211

def generate_uri_for_temporary_term(vocabulary_string_key, term_value)
  uri = URI(@temporary_uri_base + Digest::SHA256.hexdigest(vocabulary_string_key + term_value))
  return uri.to_s
end

#handle_database_disconnectObject



481
482
483
484
485
486
487
488
489
# File 'lib/uri_service/client.rb', line 481

def handle_database_disconnect
  tries ||= 3
  begin
    yield
  rescue Sequel::DatabaseDisconnectError
    tries -= 1
    retry unless tries == 0
  end
end

#list_terms(vocabulary_string_key, limit = 10, start = 0) ⇒ Object

Lists terms alphabetically and supports paging through results. Useful for browsing through a term list without a query.



388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# File 'lib/uri_service/client.rb', line 388

def list_terms(vocabulary_string_key, limit=10, start=0)
  terms_to_return = []
  @rsolr_pool.with do |rsolr|
    
    solr_params = {
      :fq => 'vocabulary_string_key:' + UriService.solr_escape(vocabulary_string_key),
      :q => '*:*',
      :rows => limit,
      :start => start,
      :sort => 'value_ssort asc, uri asc' # Include 'uri asc' as part of sort to ensure consistent sorting
    }
    
    response = rsolr.get('select', params: solr_params)
    if response['response']['numFound'] > 0
      response['response']['docs'].each do |doc|
        terms_to_return << term_solr_doc_to_frozen_term_hash(doc)
      end
    end
  end
  return terms_to_return
end

#list_vocabularies(limit = 10, start = 0) ⇒ Object

Lists vocabularies alphabetically (by string key) and supports paging through results.



379
380
381
382
383
384
# File 'lib/uri_service/client.rb', line 379

def list_vocabularies(limit=10, start=0)
  self.handle_database_disconnect do
    db_rows = @db[UriService::VOCABULARIES].order(:string_key).limit(limit, start)
    return db_rows.map{|row| row.except(:id).stringify_keys!}
  end
end

#reindex_all_terms(clear = false, print_progress_to_console = false) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/uri_service/client.rb', line 26

def reindex_all_terms(clear=false, print_progress_to_console=false)
  self.handle_database_disconnect do
    
    if print_progress_to_console
      puts "Getting database term count..."
      total = @db[UriService::TERMS].count
      reindex_counter = 0
      puts "Number of terms to index: #{total.to_s}"
      puts ""
    end
    
    if clear
      @rsolr_pool.with do |rsolr|
        rsolr.delete_by_query('*:*');
      end
    end
    
    # Need to use unambiguous order when using paged_each, so we choose to order by DB :id
    @db[UriService::TERMS].order(:id).paged_each(:rows_per_fetch=>100) do |term_db_row|
      self.send_term_to_solr(
        term_db_row[:vocabulary_string_key],
        term_db_row[:value],
        term_db_row[:uri],
        term_db_row[:authority],
        JSON.parse(term_db_row[:additional_fields]),
        term_db_row[:type],
        term_db_row[:id],
      false)
      
      if print_progress_to_console
        reindex_counter += 1
        print "\rIndexed #{reindex_counter.to_s} of #{total.to_s}"
      end
    end
    
    puts "\n" + "Committing solr updates..." if print_progress_to_console
    self.do_solr_commit
    puts "Done." if print_progress_to_console
  end
end

#required_tables_exist?Boolean

Returns:

  • (Boolean)


101
102
103
# File 'lib/uri_service/client.rb', line 101

def required_tables_exist?
  return (UriService.required_tables - @db.tables).length == 0
end

#send_term_to_solr(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id, commit = true) ⇒ Object

Index the DB row term data into solr



253
254
255
256
257
258
259
# File 'lib/uri_service/client.rb', line 253

def send_term_to_solr(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id, commit=true)
  doc = create_term_solr_doc(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id)
  @rsolr_pool.with do |rsolr|
    rsolr.add(doc)
    rsolr.commit if commit
  end
end

#term_solr_doc_to_frozen_term_hash(term_solr_doc) ⇒ Object



327
328
329
330
331
332
333
334
335
336
337
338
# File 'lib/uri_service/client.rb', line 327

def term_solr_doc_to_frozen_term_hash(term_solr_doc)
  
  uri = term_solr_doc.delete('uri')
  vocabulary_string_key = term_solr_doc.delete('vocabulary_string_key')
  value = term_solr_doc.delete('value')
  authority = term_solr_doc.delete('authority')
  type = term_solr_doc.delete('type')
  additional_fields = JSON.parse(term_solr_doc.delete('additional_fields'))
  internal_id = term_solr_doc.delete('internal_id')
  
  return generate_frozen_term_hash(vocabulary_string_key, value, uri, authority, additional_fields, type, internal_id)
end

#test_connectionObject



94
95
96
97
98
99
# File 'lib/uri_service/client.rb', line 94

def test_connection
  @db.test_connection # Raises Sequel::DatabaseConnectionError if connection didn't work
  @rsolr_pool.with do |rsolr|
    rsolr.get('admin/ping') # Raises Errno::ECONNREFUSED if connection didn't work
  end
end

#update_term(uri, opts, merge_additional_fields = true) ⇒ Object

opts format: => ‘new value’, :authority => ‘newauthority’, :additional_fields => {‘key’ => ‘value’}



448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
# File 'lib/uri_service/client.rb', line 448

def update_term(uri, opts, merge_additional_fields=true)
  self.handle_database_disconnect do
    term_db_row = @db[UriService::TERMS].first(uri: uri)
    raise UriService::NonExistentUriError, "No term found with uri: " + uri if term_db_row.nil?
    
    new_value = opts[:value] || term_db_row[:value]
    new_authority = opts[:authority] || term_db_row[:authority]
    new_additional_fields = term_db_row[:additional_fields].nil? ? {} : JSON.parse(term_db_row[:additional_fields])
    
    if term_db_row[:type] == UriService::TermType::TEMPORARY && new_value != term_db_row[:value]
      # TEMPORARY terms cannot have their values changed, but it is possible to update other fields
      raise UriService::CannotChangeTemporaryTermValue, "The value of a temporary term cannot be changed. Delete unusued temporary terms or create a new one with a different value."
    end
    
    unless opts[:additional_fields].nil?
      if merge_additional_fields
        new_additional_fields.merge!(opts[:additional_fields])
        new_additional_fields.delete_if { |k, v| v.nil? } # Delete nil values. This is a way to clear data in additional_fields.
      else
        new_additional_fields = opts[:additional_fields]
      end
    end
    validate_additional_field_keys(new_additional_fields)
    
    @db.transaction do
      @db[UriService::TERMS].where(uri: uri).update(value: new_value, value_hash: Digest::SHA256.hexdigest(new_value), authority: new_authority, additional_fields: JSON.generate(new_additional_fields))
      self.send_term_to_solr(term_db_row[:vocabulary_string_key], new_value, uri, new_authority, new_additional_fields, term_db_row[:type], term_db_row[:id])
    end
    
    return generate_frozen_term_hash(term_db_row[:vocabulary_string_key], new_value, uri, new_authority, new_additional_fields, term_db_row[:type], term_db_row[:id])
  end
end

#update_vocabulary(string_key, new_display_label) ⇒ Object

Update methods #



436
437
438
439
440
441
442
443
444
445
# File 'lib/uri_service/client.rb', line 436

def update_vocabulary(string_key, new_display_label)
  self.handle_database_disconnect do
    dataset = @db[UriService::VOCABULARIES].where(string_key: string_key)
    raise UriService::NonExistentVocabularyError, "No vocabulary found with string_key: " + string_key if dataset.count == 0
    
    @db.transaction do
      dataset.update(display_label: new_display_label)
    end
  end
end

#validate_additional_field_keys(additional_fields) ⇒ Object

Validates additional_fields and verifies that no reserved words are supplied



262
263
264
265
266
267
268
269
270
271
# File 'lib/uri_service/client.rb', line 262

def validate_additional_field_keys(additional_fields)
  additional_fields.each do |key, value|
    if CORE_FIELD_NAMES.include?(key.to_s)
      raise UriService::InvalidAdditionalFieldKeyError, "Cannot supply the key \"#{key.to_s}\" as an additional field because it is a reserved key."
    end
    unless key.to_s =~ ALPHANUMERIC_UNDERSCORE_KEY_REGEX
      raise UriService::InvalidAdditionalFieldKeyError, "Invalid key (can only include lower case letters, numbers or underscores, but cannot start with an underscore): " + key
    end
  end
end