Class: Elasticsearch::Rails::HA::ParallelIndexer
- Inherits:
-
Object
- Object
- Elasticsearch::Rails::HA::ParallelIndexer
- Defined in:
- lib/elasticsearch/rails/ha/parallel_indexer.rb
Instance Attribute Summary collapse
-
#batch_size ⇒ Object
readonly
Returns the value of attribute batch_size.
-
#force ⇒ Object
readonly
Returns the value of attribute force.
-
#idx_name ⇒ Object
readonly
Returns the value of attribute idx_name.
-
#klass ⇒ Object
readonly
Returns the value of attribute klass.
-
#max ⇒ Object
readonly
Returns the value of attribute max.
-
#nprocs ⇒ Object
readonly
Returns the value of attribute nprocs.
-
#scope ⇒ Object
readonly
Returns the value of attribute scope.
-
#verbose ⇒ Object
readonly
Returns the value of attribute verbose.
Instance Method Summary collapse
-
#initialize(opts) ⇒ ParallelIndexer
constructor
leverage multiple cores to run indexing in parallel.
- #process_child_results(results) ⇒ Object
- #run ⇒ Object
- #run_child(start_at) ⇒ Object
Constructor Details
#initialize(opts) ⇒ ParallelIndexer
leverage multiple cores to run indexing in parallel
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 12 def initialize(opts) @klass = opts[:klass] or fail "klass required" @idx_name = opts[:idx_name] or fail "idx_name required" @nprocs = opts[:nprocs] or fail "nprocs required" @batch_size = opts[:batch_size] or fail "batch_size required" @max = opts[:max] @force = opts[:force] @verbose = opts[:verbose] @scope = opts[:scope] # make sure klass is not a simple string if @klass.is_a?(String) @klass = @klass.constantize end # calculate array of offsets based on nprocs @total_expected = klass.count @pool_size = (@total_expected / @nprocs.to_f).ceil end |
Instance Attribute Details
#batch_size ⇒ Object (readonly)
Returns the value of attribute batch_size.
9 10 11 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 9 def batch_size @batch_size end |
#force ⇒ Object (readonly)
Returns the value of attribute force.
9 10 11 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 9 def force @force end |
#idx_name ⇒ Object (readonly)
Returns the value of attribute idx_name.
9 10 11 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 9 def idx_name @idx_name end |
#klass ⇒ Object (readonly)
Returns the value of attribute klass.
9 10 11 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 9 def klass @klass end |
#max ⇒ Object (readonly)
Returns the value of attribute max.
9 10 11 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 9 def max @max end |
#nprocs ⇒ Object (readonly)
Returns the value of attribute nprocs.
9 10 11 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 9 def nprocs @nprocs end |
#scope ⇒ Object (readonly)
Returns the value of attribute scope.
9 10 11 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 9 def scope @scope end |
#verbose ⇒ Object (readonly)
Returns the value of attribute verbose.
9 10 11 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 9 def verbose @verbose end |
Instance Method Details
#process_child_results(results) ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 80 def process_child_results(results) # check exit status of each child so we know if we should throw exception results.each do |pair| pid = pair[0] pstat = pair[1] exit_ok = true if pstat.exited? @verbose and puts ::ANSI.blue{ "PID #{pid} exited with #{pstat.exitstatus}" } end if pstat.signaled? puts ::ANSI.red{ " >> #{pid} exited with uncaught signal #{pstat.termsig}" } exit_ok = false end if !pstat.success? puts ::ANSI.red{ " >> #{pid} was not successful" } exit_ok = false end if pair[1].exitstatus != 0 puts ::ANSI.red{ " >> #{pid} exited with non-zero status" } exit_ok = false end if !exit_ok raise ::ANSI.red{ "PID #{pair[0]} exited abnormally, so the whole reindex fails" } end end end |
#run ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 32 def run return if @pool_size < 1 # get all ids since we can't assume there are no holes in the PK sequencing ids = klass.order('id ASC').pluck(:id) offsets = [] ids.each_slice(@pool_size) do |chunk| #puts "chunk: size=#{chunk.size} #{chunk.first}..#{chunk.last}" offsets.push( chunk.first ) end if @verbose puts ::ANSI.blue{ "Parallel Indexer: index=#{@idx_name} total=#{@total_expected} nprocs=#{@nprocs} pool_size=#{@pool_size} offsets=#{offsets} " } end if @force @verbose and puts ::ANSI.blue{ "Force creating new index" } klass.__elasticsearch__.create_index! force: true, index: idx_name klass.__elasticsearch__.refresh_index! index: idx_name end @current_db_config = ActiveRecord::Base.connection_config # IMPORTANT before forks in offsets loop ActiveRecord::Base.connection.disconnect! child_pids = [] offsets.each do |start_at| child_pid = fork do run_child(start_at) end if child_pid child_pids << child_pid end end # reconnect in parent ActiveRecord::Base.establish_connection(@current_db_config) # Process.waitall seems to hang during tests. Do it manually. child_results = [] child_pids.each do |pid| Process.wait(pid) child_results.push [pid, $?] end process_child_results(child_results) end |
#run_child(start_at) ⇒ Object
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# File 'lib/elasticsearch/rails/ha/parallel_indexer.rb', line 110 def run_child(start_at) # IMPORTANT after fork ActiveRecord::Base.establish_connection(@current_db_config) # IMPORTANT for tests to determine whether at_end should run ENV["I_AM_HA_CHILD"] = "true" completed = 0 errors = [] @verbose and puts ::ANSI.blue{ "Start worker #{$$} at offset #{start_at}" } = ::ANSI::.new("#{klass} [#{$$}]", @pool_size, STDOUT) rescue nil checkpoint = false if win_width = .__send__ :get_width title_width = (win_width / 4).to_i .format("#{klass} [#{$$}]: %3d%% %s %s", :percentage, :bar, :stat) .__send__ :show . = '=' else checkpoint = true end @klass.__elasticsearch__.import return: 'errors', index: @idx_name, start: start_at, scope: @scope, batch_size: @batch_size do |resp| # show errors immediately (rather than buffering them) errors += resp['items'].select { |k, v| k.values.first['error'] } completed += resp['items'].size if && @verbose .inc resp['items'].size end if checkpoint && @verbose puts ::ANSI.blue{ "[#{$$}] #{Time.now.utc.iso8601} : #{completed} records completed" } end STDERR.flush STDOUT.flush if errors.size > 0 STDOUT.puts "ERRORS in #{$$}:" STDOUT.puts errors.pretty_inspect end if completed >= @pool_size || (@max && @max.to_i == completed) .finish if @verbose and puts ::ANSI.blue{ "Worker #{$$} finished #{completed} records" } exit!(true) # exit child worker end end # end do |resp| block end |