Class: CDMDEXER::ETLWorker

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Sidekiq::Worker
Defined in:
lib/cdmdexer/etl_worker.rb

Overview

Extract records from OAI, delete records marked for deletion and send everything else to a transformation / load worker

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#batch_sizeObject (readonly)

Returns the value of attribute batch_size.



14
15
16
# File 'lib/cdmdexer/etl_worker.rb', line 14

def batch_size
  @batch_size
end

#cdm_endpointObject (readonly)

Returns the value of attribute cdm_endpoint.



14
15
16
# File 'lib/cdmdexer/etl_worker.rb', line 14

def cdm_endpoint
  @cdm_endpoint
end

#completed_callback_klassObject

Because Sidekiq serializes params to JSON, we provide custom setters for dependencies (normally these would be default params in the constructor) so that they may be mocked and tested



64
65
66
# File 'lib/cdmdexer/etl_worker.rb', line 64

def completed_callback_klass
  @completed_callback_klass ||= CDMDEXER::CompletedCallback
end

#configObject (readonly)

Returns the value of attribute config.



14
15
16
# File 'lib/cdmdexer/etl_worker.rb', line 14

def config
  @config
end

#etl_worker_klassObject



68
69
70
# File 'lib/cdmdexer/etl_worker.rb', line 68

def etl_worker_klass
  @etl_worker_klass ||= ETLWorker
end

#field_mappingsObject (readonly)

Returns the value of attribute field_mappings.



14
15
16
# File 'lib/cdmdexer/etl_worker.rb', line 14

def field_mappings
  @field_mappings
end

#is_recursiveObject (readonly)

Returns the value of attribute is_recursive.



14
15
16
# File 'lib/cdmdexer/etl_worker.rb', line 14

def is_recursive
  @is_recursive
end

#load_worker_klassObject



76
77
78
# File 'lib/cdmdexer/etl_worker.rb', line 76

def load_worker_klass
  @load_worker_klass ||= LoadWorker
end

#oai_endpointObject (readonly)

Returns the value of attribute oai_endpoint.



14
15
16
# File 'lib/cdmdexer/etl_worker.rb', line 14

def oai_endpoint
  @oai_endpoint
end

#oai_request_klassObject



72
73
74
# File 'lib/cdmdexer/etl_worker.rb', line 72

def oai_request_klass
  @oai_request_klass ||= OaiRequest
end

#resumption_tokenObject (readonly)

Returns the value of attribute resumption_token.



14
15
16
# File 'lib/cdmdexer/etl_worker.rb', line 14

def resumption_token
  @resumption_token
end

#solr_configObject (readonly)

Returns the value of attribute solr_config.



14
15
16
# File 'lib/cdmdexer/etl_worker.rb', line 14

def solr_config
  @solr_config
end

#transform_worker_klassObject



80
81
82
# File 'lib/cdmdexer/etl_worker.rb', line 80

def transform_worker_klass
  @transform_worker_klass ||= TransformWorker
end

Instance Method Details

#perform(config) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/cdmdexer/etl_worker.rb', line 29

def perform(config)
  # Sidekiq stores params in JSON, so we can't inject dependencies. This
  # results in the long set of arguments that follows. Otherwise, we'd
  # simply inject the OAI request and extractor objects
  @config            = config
  @solr_config       = config.fetch('solr_config').symbolize_keys
  @cdm_endpoint      = config.fetch('cdm_endpoint')
  @oai_endpoint      = config.fetch('oai_endpoint')
  @field_mappings    = config.fetch('field_mappings', false)
  @resumption_token  = config.fetch('resumption_token', nil)
  @batch_size        = config.fetch('batch_size', 5).to_i
  @is_recursive      = config.fetch('is_recursive', true)
  after_date         = config.fetch('after_date', false)

  @oai_request = oai_request_klass.new(
    endpoint_url: oai_endpoint,
    resumption_token: resumption_token,
    set_spec: config.fetch('set_spec', nil),
    # Optionally only select records that have been updated after a
    # certain date. You may need to manually update a parent record
    # after updating a child in order to signify to the indexer that
    # some record in the parent's children has been updated. This indexer
    # expects to only see parent records in the OAI responses.
    # The default here is to skip indexing based on date.
    # Rails example for getting a date: `after_date: 2.weeks.ago`
    after_date: after_date
  )

  run_batch!
  run_next_batch!
end

#run_next_batch!Object

Recurse through OAI batches one at a time



85
86
87
88
89
90
91
# File 'lib/cdmdexer/etl_worker.rb', line 85

def run_next_batch!
  if next_resumption_token && is_recursive
    etl_worker_klass.perform_async(next_config)
  else
    completed_callback_klass.call!(config)
  end
end