Class: GeoCombine::Indexer

Inherits:
Object
  • Object
show all
Defined in:
lib/geo_combine/indexer.rb

Overview

Indexes Geoblacklight documents into Solr

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(solr: nil, logger: GeoCombine::Logger.logger) ⇒ Indexer

Returns a new instance of Indexer.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/geo_combine/indexer.rb', line 13

def initialize(solr: nil, logger: GeoCombine::Logger.logger)
  @logger = logger
  @batch_size = ENV.fetch('SOLR_BATCH_SIZE', 100).to_i

  # If SOLR_URL is set, use it; if in a Geoblacklight app, use its solr core
  solr_url = ENV.fetch('SOLR_URL', nil)
  solr_url ||= Blacklight.default_index.connection.base_uri.to_s if defined? Blacklight

  # If neither, warn and try to use local Blacklight default solr core
  if solr_url.nil?
    @logger.warn 'SOLR_URL not set; using Blacklight default'
    solr_url = 'http://localhost:8983/solr/blacklight-core'
  end

  @solr = solr || RSolr.connect(client, url: solr_url)
end

Instance Attribute Details

#solrObject (readonly)

Returns the value of attribute solr.



11
12
13
# File 'lib/geo_combine/indexer.rb', line 11

def solr
  @solr
end

Instance Method Details

#index(docs) ⇒ Object

Index everything and return the number of docs successfully indexed



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/geo_combine/indexer.rb', line 31

def index(docs)
  # Track total indexed and time spent
  @logger.info "indexing into #{solr_url}"
  total_indexed = 0
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)

  # Index in batches; set batch size via BATCH_SIZE
  batch = []
  docs.each do |doc, path|
    if batch.size < @batch_size
      batch << [doc, path]
    else
      total_indexed += index_batch(batch)
      batch = []
    end
  end
  total_indexed += index_batch(batch) unless batch.empty?

  # Issue a commit to make sure all documents are indexed
  @solr.commit
  end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  sec = end_time - start_time
  @logger.info format('indexed %<total_indexed>d documents in %<sec>.2f seconds', total_indexed:, sec:)
  total_indexed
end

#solr_urlObject

URL to the solr instance being used



58
59
60
# File 'lib/geo_combine/indexer.rb', line 58

def solr_url
  @solr.options[:url]
end