Class: ESReindex

Inherits:
Object
  • Object
show all
Defined in:
lib/es-reindex.rb,
lib/es-reindex/railtie.rb,
lib/es-reindex/version.rb,
lib/es-reindex/args_parser.rb

Defined Under Namespace

Classes: ArgsParser, Railtie

Constant Summary collapse

DEFAULT_URL =
'http://10.203.175.32:9200'
VERSION =
'0.2.0'

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src, dst, options = {}) ⇒ ESReindex

Returns a new instance of ESReindex.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/es-reindex.rb', line 26

def initialize(src, dst, options = {})
  ESReindex.logger ||= Logger.new(STDERR)

  @src     = src || ''
  @dst     = dst || ''
  @options = {
    from_cli: false, # Coming from CLI?
    remove: false, # remove the index in the new location first
    update: false, # update existing documents (default: only create non-existing)
    frame:  1000,  # specify frame size to be obtained with one fetch during scrolling
    copy_mappings: true # Copy old mappings/settings
  }.merge! options

  @done = 0
end

Class Attribute Details

.loggerObject

Returns the value of attribute logger.



187
188
189
# File 'lib/es-reindex.rb', line 187

def logger
  @logger
end

Instance Attribute Details

#dclientObject

Returns the value of attribute dclient.



10
11
12
# File 'lib/es-reindex.rb', line 10

def dclient
  @dclient
end

#didxObject

Returns the value of attribute didx.



10
11
12
# File 'lib/es-reindex.rb', line 10

def didx
  @didx
end

#doneObject

Returns the value of attribute done.



10
11
12
# File 'lib/es-reindex.rb', line 10

def done
  @done
end

#dstObject

Returns the value of attribute dst.



10
11
12
# File 'lib/es-reindex.rb', line 10

def dst
  @dst
end

#durlObject

Returns the value of attribute durl.



10
11
12
# File 'lib/es-reindex.rb', line 10

def durl
  @durl
end

#mappingsObject

Returns the value of attribute mappings.



10
11
12
# File 'lib/es-reindex.rb', line 10

def mappings
  @mappings
end

#optionsObject

Returns the value of attribute options.



10
11
12
# File 'lib/es-reindex.rb', line 10

def options
  @options
end

#sclientObject

Returns the value of attribute sclient.



10
11
12
# File 'lib/es-reindex.rb', line 10

def sclient
  @sclient
end

#settingsObject

Returns the value of attribute settings.



10
11
12
# File 'lib/es-reindex.rb', line 10

def settings
  @settings
end

#sidxObject

Returns the value of attribute sidx.



10
11
12
# File 'lib/es-reindex.rb', line 10

def sidx
  @sidx
end

#srcObject

Returns the value of attribute src.



10
11
12
# File 'lib/es-reindex.rb', line 10

def src
  @src
end

#start_timeObject

Returns the value of attribute start_time.



10
11
12
# File 'lib/es-reindex.rb', line 10

def start_time
  @start_time
end

#surlObject

Returns the value of attribute surl.



10
11
12
# File 'lib/es-reindex.rb', line 10

def surl
  @surl
end

Class Method Details

.copy!(src, dst, options = {}) ⇒ Object



12
13
14
15
16
17
# File 'lib/es-reindex.rb', line 12

def self.copy!(src, dst, options = {})
  self.new(src, dst, options).tap do |reindexer|
    reindexer.setup_index_urls
    reindexer.copy!
  end
end

.reindex!(src, dst, options = {}) ⇒ Object



19
20
21
22
23
24
# File 'lib/es-reindex.rb', line 19

def self.reindex!(src, dst, options={})
  self.new(src, dst, options.merge(copy_mappings: false)).tap do |reindexer|
    reindexer.setup_index_urls
    reindexer.copy!
  end
end

Instance Method Details

#check_docsObject



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/es-reindex.rb', line 167

def check_docs
  log 'Checking document count... '
  scount, dcount = 1, 0
  begin
    Timeout::timeout(60) do
      while true
        scount = sclient.count(index: sidx)["count"]
        dcount = dclient.count(index: didx)["count"]
        break if scount == dcount
        sleep 1
      end
    end
  rescue Timeout::Error
  end
  log "Document count: #{scount} = #{dcount} (#{scount == dcount ? 'equal' : 'NOT EQUAL'})"

  scount == dcount
end

#clear_destinationObject



81
82
83
84
85
86
# File 'lib/es-reindex.rb', line 81

def clear_destination
  dclient.indices.delete(index: didx) if remove? && dclient.indices.exists(index: didx)
  true
rescue => e
  false
end

#confirmObject



76
77
78
79
# File 'lib/es-reindex.rb', line 76

def confirm
  printf "Confirm or hit Ctrl-c to abort...\n"
  $stdin.readline
end

#copy!Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/es-reindex.rb', line 58

def copy!
  log "Copying '#{surl}/#{sidx}' to '#{durl}/#{didx}'#{remove? ? ' with rewriting destination mapping!' : update? ? ' with updating existing documents!' : '.'}"
  confirm if from_cli?

  success = (
    clear_destination  &&
    create_destination &&
    copy_docs          &&
    check_docs
  )

  if from_cli?
    exit (success ? 0 : 1)
  else
    success
  end
end

#copy_docsObject



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/es-reindex.rb', line 130

def copy_docs
  log "Copying '#{surl}/#{sidx}' to '#{durl}/#{didx}'..."
  @start_time = Time.now

  scroll = sclient.search index: sidx, search_type: "scan", scroll: '10m', size: frame
  scroll_id = scroll['_scroll_id']
  total = scroll['hits']['total']
  log "Copy progress: %u/%u (%.1f%%) done.\r" % [done, total, 0]

  action = update? ? 'index' : 'create'

  while scroll = sclient.scroll(scroll_id: scroll['_scroll_id'], scroll: '10m') and not scroll['hits']['hits'].empty? do
    bulk = []
    scroll['hits']['hits'].each do |doc|
      options[:before_each].call doc if options[:before_each].present?
      ### === implement possible modifications to the document
      ### === end modifications to the document
      base = {'_index' => didx, '_id' => doc['_id'], '_type' => doc['_type'], data: doc['_source']}
      bulk << {action => base}
      @done = done + 1
      options[:after_each].call doc if options[:after_each].present?
    end
    unless bulk.empty?
      dclient.bulk body: bulk
    end

    eta = total * (Time.now - start_time) / done
    log "Copy progress: #{done}/#{total} (%.1f%%) done in #{tm_len}. E.T.A.: #{start_time + eta}." % (100.0 * done / total)
  end

  log "Copy progress: %u/%u done in %s.\n" % [done, total, tm_len]

  options[:after_copy].call if options[:after_copy].present?

  true
end

#create_destinationObject



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/es-reindex.rb', line 88

def create_destination
  unless dclient.indices.exists index: didx
    if copy_mappings?
      return false unless get_settings
      return false unless get_mappings
      create_msg = " with settings & mappings from '#{surl}/#{sidx}'"
    else
      @mappings = options[:mappings].call
      @settings = options[:settings].call
      create_msg = ""
    end

    options[:before_create].call if options[:before_create].present?

    log "Creating '#{durl}/#{didx}' index#{create_msg}..."
    dclient.indices.create index: didx, body: { settings: settings, mappings: mappings }
    log "Succesfully created '#{durl}/#{didx}''#{create_msg}."

    options[:after_create].call if options[:after_create].present?
  end

  true
end

#get_mappingsObject



122
123
124
125
126
127
128
# File 'lib/es-reindex.rb', line 122

def get_mappings
  unless mappings = sclient.indices.get_mapping(index: sidx)
    log "Failed to obtain original index '#{surl}/#{sidx}' mappings!", :error
    return false
  end
  @mappings = mappings[sidx]["mappings"]
end

#get_settingsObject



112
113
114
115
116
117
118
119
120
# File 'lib/es-reindex.rb', line 112

def get_settings
  unless settings = sclient.indices.get_settings(index: sidx)
    log "Failed to obtain original index '#{surl}/#{sidx}' settings!"
    return false
  end

  @settings = settings[sidx]["settings"]
  @settings["index"]["version"].delete "created"
end

#setup_index_urlsObject



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/es-reindex.rb', line 42

def setup_index_urls
  @surl, @durl, @sidx, @didx = '', '', '', ''
  [[src, surl, sidx], [dst, durl, didx]].each do |param, url, idx|
    if param =~ %r{^(.*)/(.*?)$}
      url.replace $1
      idx.replace $2
    else
      url.replace DEFAULT_URL
      idx.replace param
    end
  end

  @sclient = Elasticsearch::Client.new host: surl
  @dclient = Elasticsearch::Client.new host: durl
end