Class: Aspire::Caching::Builder

Inherits:
Object
  • Object
show all
Includes:
Util, Exceptions
Defined in:
lib/aspire/caching/builder.rb

Overview

Caches Aspire API objects and their references

Constant Summary

Constants included from Util

Util::CACHEABLE

Constants included from Util

Util::LD_API_URI

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Util

#add_filename_prefix, #add_filename_suffix, #cacheable_url, #end_of_path?, #mkdir, #references, #rm, #rmdir_empty, #strip_ext, #strip_filename_prefix, #strip_filename_suffix, #strip_prefix, #strip_suffix

Methods included from Util

#child_url?, #duration, #id_from_uri, #item?, #linked_data, #linked_data_path, #list?, #list_url?, #module?, #parent_url?, #parse_url, #resource?, #section?, #url_for_comparison, #url_path, #user?

Constructor Details

#initialize(cache = nil) ⇒ void

Initialises a new Cache instance

Parameters:



26
27
28
# File 'lib/aspire/caching/builder.rb', line 26

def initialize(cache = nil)
  self.cache = cache
end

Instance Attribute Details

#cacheAspire::Caching::Cache

Returns the Aspire cache.

Returns:



21
22
23
# File 'lib/aspire/caching/builder.rb', line 21

def cache
  @cache
end

Instance Method Details

#build(enumerator, clear: false) ⇒ Integer

Builds a cache of Aspire lists from the Aspire All Lists report

Parameters:

Returns:

  • (Integer)

    the number of lists cached



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/aspire/caching/builder.rb', line 35

def build(enumerator, clear: false)
  # Empty the cache if required
  cache.clear if clear
  # Cache the enumerated lists
  # - call with reload: false so that existing cache entries are ignored
  #   to speed up processing
  lists = 0
  time = Benchmark.measure do
    enumerator.each do |row|
      write_list(row['List Link'], reload: false)
      lists += 1
    end
  end
  # Log completion
  cache.logger.info("#{lists} lists cached in #{duration(time)}")
end

#resume(enumerator) ⇒ Object

Resumes an interrupted build

Parameters:



55
56
57
58
59
60
61
62
# File 'lib/aspire/caching/builder.rb', line 55

def resume(enumerator)
  # Log activity
  cache.logger.info('Resuming previous build')
  # Reload any list marked as in-progress
  reload_marked_lists
  # Resume the build
  build(enumerator, clear: false)
end

#write(url = nil, data = nil, list: nil, reload: true, urls: {}) ⇒ void

This method returns an undefined value.

Caches an Aspire linked data API object.

Use write(url) to build a cache for the first time.
Use write(url, reload: true) to reload parts of the cache.

# of the API object

Parameters:

  • url (String, Aspire::Caching::CacheEntry) (defaults to: nil)

    the URL or cache entry

  • data (Hash, nil) (defaults to: nil)

    the parsed JSON data to be written to the cache; if omitted, this is read from the API

  • list (Aspire::Caching::CacheEntry) (defaults to: nil)

    the parent list cache entry; if present, this implies that references to other lists are ignored

  • reload (Boolean) (defaults to: true)

    if true, reload the cache entry from the API, otherwise do nothing if the entry is already in the cache

  • urls (Hash) (defaults to: {})

    the set of URLs handled in the current operation



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/aspire/caching/builder.rb', line 77

def write(url = nil, data = nil, list: nil, reload: true, urls: {})
  #
  # Parsed data from the Linked Data API has the following structure:
  # { url => {primary-object},
  #   related-url1 => {related-object1}, ... }
  # where url => {primary-object} is the object referenced by the url
  # parameter, and the related URLs/objects are objects referenced by
  # the primary object and included in the API response.
  #
  # The primary and related objects are written to the caching before any
  # object references within the primary and related objects are followed.
  # This should reduce unnecessary duplication of API calls.
  #
  # Some objects with a linked data URL are not accessible through that
  # API(e.g. users /users/<user-id> are not accessible, but user notes
  # /users/<user-id>/notes<note-id> are accessible).
  #
  # Some objects with a linked data URL are accessible though the API but
  # do not return JSON-LD (e.g. events /events/<event-id> return regular
  # JSON rather than JSON-LD). These objects are cached but no attempt is
  # made to follow LD references within them.
  #
  # byebug if url.is_a?(String) && url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
  # byebug if url.is_a?(Aspire::Caching::CacheEntry) && url.url.include?('34C1190E-F50E-35CB-94C9-F476963D69C0')
  entry = cache_entry(url, list)
  return unless entry && write?(entry, urls, list, reload)
  write_data(entry, urls, data, list, reload)
rescue NotCacheable
  # cache.logger.debug("#{url} not cacheable")
rescue StandardError => e
  # Log the error and continue processing
  Raven.capture_exception(e)
  # cache.logger.error("#{e}\n#{e.backtrace.join('\n')}")
  cache.logger.error(e.to_s)
rescue Exception => e
  # Log the error and fail
  Raven.capture_exception(e)
  # cache.logger.fatal("#{e}\n#{e.backtrace.join('\n')}")
  cache.logger.fatal(e.to_s)
  raise e
end

#write_list(url = nil, data = nil, reload: true) ⇒ void

This method returns an undefined value.

Caches an Aspire linked data API list object and ignores any references to other lists

Parameters:

  • url (String, Aspire::Caching::CacheEntry) (defaults to: nil)

    the URL or cache entry of the API list object

  • data (Hash, nil) (defaults to: nil)

    the parsed JSON data to be written to the cache; if omitted, this is read from the API

  • reload (Boolean) (defaults to: true)

    if true, reload the cache entry from the API, otherwise do nothing if the entry is already in the cache



128
129
130
131
132
133
134
# File 'lib/aspire/caching/builder.rb', line 128

def write_list(url = nil, data = nil, reload: true)
  entry = cache_entry(url)
  raise ArgumentError, 'List expected' unless entry.list?
  write(entry, data, list: entry, reload: reload)
rescue NotCacheable
  # cache.logger.debug("#{url} not cacheable")
end