Module: Cul::Hydra::Indexer
- Defined in:
- lib/cul_hydra/indexer.rb
Constant Summary collapse
- NUM_FEDORA_RETRY_ATTEMPTS =
3
- DELAY_BETWEEN_FEDORA_RETRY_ATTEMPTS =
5.seconds
- DEFAULT_INDEX_OPTS =
{ skip_generic_resources: false, verbose_output: false, softcommit: true, reraise: false }.freeze
Class Method Summary collapse
- .descend_from(pid, pids_to_omit = nil, verbose_output = false) ⇒ Object
-
.extract_index_opts(args) ⇒ Object
this is a compatibility method for bridging the previously used postional arguments to keyword arguments by extracting an opts hash from varargs legacy positional opts signature: skip_resources = false, verbose_output = false, softcommit = true keyword defaults are in DEFAULT_INDEX_OPTS.
- .index_pid(pid, *args) ⇒ Object
- .recursively_index_fedora_objects(top_pid, pids_to_omit = nil, skip_generic_resources = false, verbose_output = false) ⇒ Object
Class Method Details
.descend_from(pid, pids_to_omit = nil, verbose_output = false) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/cul_hydra/indexer.rb', line 10 def self.descend_from(pid, pids_to_omit=nil, verbose_output=false) if pid.blank? raise 'Please supply a pid (e.g. rake recursively_index_fedora_objects pid=ldpd:123)' end begin unless ActiveFedora::Base.exists?(pid) raise 'Could not find Fedora object with pid: ' + pid end if pids_to_omit.present? && pids_to_omit.include?(pid) puts 'Skipping topmost object in this set (' + pid + ') because it has been intentionally omitted...' if verbose_output else puts 'Indexing topmost object in this set (' + pid + ')...' if verbose_output puts 'If this is a BagAggregator with a lot of members, this may take a while...' if verbose_output yield pid end puts 'Recursively retreieving and indexing all members of ' + pid + '...' unique_pids = Cul::Hydra::RisearchMembers.get_recursive_member_pids(pid, true) total_number_of_members = unique_pids.length puts 'Recursive search found ' + total_number_of_members.to_s + ' members.' if verbose_output if pids_to_omit.present? unique_pids = unique_pids - pids_to_omit total_number_of_members = unique_pids.length puts 'After checking against the list of omitted pids, the total number of objects to index will be: ' + total_number_of_members.to_s if verbose_output end i = 1 if total_number_of_members > 0 unique_pids.each {|pid| puts 'Recursing on ' + i.to_s + ' of ' + total_number_of_members.to_s + ' members (' + pid + ')...' if verbose_output yield pid i += 1 } end rescue RestClient::Unauthorized => e = "Skipping #{pid} due to error: " + e. + '. Problem with Fedora object?' puts logger.error if defined?(logger) end puts 'Recursion complete!' end |
.extract_index_opts(args) ⇒ Object
this is a compatibility method for bridging the previously used postional arguments to keyword arguments by extracting an opts hash from varargs legacy positional opts signature: skip_resources = false, verbose_output = false, softcommit = true keyword defaults are in DEFAULT_INDEX_OPTS
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/cul_hydra/indexer.rb', line 79 def self.extract_index_opts(args) args = args.dup # do not modify the original list # extract opts hash index_opts = (args.last.is_a? Hash) ? args.pop : {} # symbolize keys and reverse merge defaults index_opts = index_opts.map {|k,v| [k.to_sym, v] }.to_h index_opts = DEFAULT_INDEX_OPTS.merge(index_opts) # assign any legacy positional arguments, permitting explicit nils unless args.empty? index_opts[:skip_generic_resources] = args[0] if args.length > 0 index_opts[:verbose_output] = args[1] if args.length > 1 index_opts[:softcommit] = args[2] if args.length > 2 end index_opts end |
.index_pid(pid, *args) ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/cul_hydra/indexer.rb', line 95 def self.index_pid(pid, *args) # We found an object with the desired PID. Let's reindex it index_opts = extract_index_opts(args) begin active_fedora_object = nil NUM_FEDORA_RETRY_ATTEMPTS.times do |i| begin active_fedora_object = ActiveFedora::Base.find(pid, :cast => true) if index_opts[:skip_generic_resources] && active_fedora_object.is_a?(GenericResource) puts 'Object was skipped because GenericResources are being skipped and it is a GenericResource.' else if index_opts[:softcommit] active_fedora_object.update_index else # Using direct solr query to update document without soft commiting ActiveFedora::SolrService.add(active_fedora_object.to_solr) end puts 'done.' if index_opts[:verbose_output] end break rescue RestClient::RequestTimeout, Errno::EHOSTUNREACH => e remaining_attempts = (NUM_FEDORA_RETRY_ATTEMPTS-1) - i if remaining_attempts == 0 raise else Rails.logger.error "Error: Could not connect to fedora. (#{e.class.to_s + ': ' + e.}). Will retry #{remaining_attempts} more #{remaining_attempts == 1 ? 'time' : 'times'} (after a #{DELAY_BETWEEN_FEDORA_RETRY_ATTEMPTS} second delay)." sleep DELAY_BETWEEN_FEDORA_RETRY_ATTEMPTS end rescue RuntimeError => e if e..index('Circular dependency detected while autoloading') # The RuntimeError 'Circular dependency detected while autoloading CLASSNAME' comes up when # we're doing multithreaded indexing. Waiting a few seconds for the class to autoload and then # retrying seems to help with this. sleep 5 else # Other RuntimeErrors should be passed on raise end end end rescue SystemExit, Interrupt => e # Allow system interrupt (ctrl+c) raise rescue Exception => e puts "Encountered problem with #{pid}. Skipping record. Exception class: #{e.class.name}. Message: #{e.}" if index_opts[:reraise] raise end end end |
.recursively_index_fedora_objects(top_pid, pids_to_omit = nil, skip_generic_resources = false, verbose_output = false) ⇒ Object
65 66 67 68 69 70 71 |
# File 'lib/cul_hydra/indexer.rb', line 65 def self.recursively_index_fedora_objects(top_pid, pids_to_omit=nil, skip_generic_resources=false, verbose_output=false) index_opts = { skip_generic_resources: skip_generic_resources, verbose_output: verbose_output } descend_from(top_pid, pids_to_omit, verbose_output) do |pid| self.index_pid(pid, index_opts) end end |