Class: EachInBatches::Batch

Inherits:
Object
  • Object
show all
Defined in:
lib/each_in_batches.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Batch

Returns a new instance of Batch.



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/each_in_batches.rb', line 129

def initialize(*args)
  return false unless Batch.check(*args)
  @arel = args.first[:arel]
  @verbose = args.first[:verbose].blank? ? false : args.first[:verbose]
  @backwards = args.first[:backwards].nil? ? false : !(args.first[:backwards] == 'false' || args.first[:backwards] == false)
  @batch_size = args.first[:batch_size] ? args.first[:batch_size].is_a?(Integer) ? args.first[:batch_size] : args.first[:batch_size].to_i : 50
  @last_batch = args.first[:last_batch] ? args.first[:last_batch].is_a?(Integer) ? args.first[:last_batch] : args.first[:last_batch].to_i : false
  @first_batch = args.first[:first_batch] ? args.first[:first_batch].is_a?(Integer) ? args.first[:first_batch] : args.first[:first_batch].to_i : 0
  @show_results = case
    when args.first[:show_results].blank? && @verbose.blank?; false
    when args.first[:show_results].blank? && @verbose == true; true
    else args.first[:show_results]
  end
  @total_time = 0
  @skipped_batches = []

  puts "Counting Records..." if self.verbose
  @total_records = @arel.count
  @num_runs = @total_records / @batch_size
  @size_of_last_run = @total_records.modulo(@batch_size)

  if @size_of_last_run > 0
    @num_runs += 1
    @extra_run = true
  else
    @extra_run = false
  end

  puts "Records: #{@total_records}, Batches: #{@num_runs}" if @verbose

  @last_batch = @num_runs - 1 unless @num_runs == 0 || @last_batch #because batch numbers start at 0 like array indexes, but only if it was not set in *args

  current_batch = 0
  @offset_array = Array.new
  if @verbose
    puts "Batch Numbering Begins With 0 (ZERO) and counts up"
    puts "Batch Size (SQL Limit): #{@batch_size}" #This is the SQL Limit
    puts "First Batch # to run: #{@first_batch}" #This is the number of the first batch to run
    puts "Last Batch # to run: #{@last_batch}" # This is the number of the last batch to run
    puts "Batches Before First and After Last will be skipped."
    puts "Creating Batches:\n"
  end
  while current_batch < @num_runs
    @offset_array << (current_batch * @batch_size)
    print "." if @verbose
    current_batch += 1
  end
  puts " #{@num_runs} Batches Created" if @verbose
  #in order to use batching for record deletion, the offsets need to start with largest first
  if @backwards
    @offset_array.reverse!
    puts "Backwards Mode:" if @verbose
  else
    puts "Normal Mode:" if @verbose
  end
  if @verbose
    puts "  First Offset: #{@offset_array.first}"
    puts "  Last Offset: #{@offset_array.last}"
    # technically the last run doesn't need a limit, and we don't technically use a limit on the last run,
    #  but there are only that many records left to process,
    #  so the effect is the same as if a limit were applied.
    # We do need the limit when running the batches backwards, however
    if @extra_run
      if @backwards
        puts "  Limit of first run: #{@size_of_last_run}"
      else
        puts "  Size of Last Run: #{@size_of_last_run}"
      end
    end
    puts "  Limit of all #{@extra_run ? "other" : ""} runs: #{@batch_size}" #This is the SQL Limit
  end
end

Instance Attribute Details

#arelObject

Returns the value of attribute arel.



15
16
17
# File 'lib/each_in_batches.rb', line 15

def arel
  @arel
end

#backwardsObject

Returns the value of attribute backwards.



18
19
20
# File 'lib/each_in_batches.rb', line 18

def backwards
  @backwards
end

#batch_sizeObject

Returns the value of attribute batch_size.



17
18
19
# File 'lib/each_in_batches.rb', line 17

def batch_size
  @batch_size
end

#completion_timesObject

Returns the value of attribute completion_times.



32
33
34
# File 'lib/each_in_batches.rb', line 32

def completion_times
  @completion_times
end

#elapsed_timeObject

Returns the value of attribute elapsed_time.



28
29
30
# File 'lib/each_in_batches.rb', line 28

def elapsed_time
  @elapsed_time
end

#end_timeObject

Returns the value of attribute end_time.



30
31
32
# File 'lib/each_in_batches.rb', line 30

def end_time
  @end_time
end

#extra_runObject

Returns the value of attribute extra_run.



25
26
27
# File 'lib/each_in_batches.rb', line 25

def extra_run
  @extra_run
end

#first_batchObject

Returns the value of attribute first_batch.



20
21
22
# File 'lib/each_in_batches.rb', line 20

def first_batch
  @first_batch
end

#last_batchObject

Returns the value of attribute last_batch.



19
20
21
# File 'lib/each_in_batches.rb', line 19

def last_batch
  @last_batch
end

#num_runsObject

Returns the value of attribute num_runs.



26
27
28
# File 'lib/each_in_batches.rb', line 26

def num_runs
  @num_runs
end

#offset_arrayObject

Returns the value of attribute offset_array.



22
23
24
# File 'lib/each_in_batches.rb', line 22

def offset_array
  @offset_array
end

#overhead_timeObject

Returns the value of attribute overhead_time.



31
32
33
# File 'lib/each_in_batches.rb', line 31

def overhead_time
  @overhead_time
end

#show_resultsObject

Returns the value of attribute show_results.



33
34
35
# File 'lib/each_in_batches.rb', line 33

def show_results
  @show_results
end

#size_of_last_runObject

Returns the value of attribute size_of_last_run.



24
25
26
# File 'lib/each_in_batches.rb', line 24

def size_of_last_run
  @size_of_last_run
end

#skipped_batchesObject

Returns the value of attribute skipped_batches.



21
22
23
# File 'lib/each_in_batches.rb', line 21

def skipped_batches
  @skipped_batches
end

#start_timeObject

Returns the value of attribute start_time.



29
30
31
# File 'lib/each_in_batches.rb', line 29

def start_time
  @start_time
end

#total_recordsObject

Returns the value of attribute total_records.



23
24
25
# File 'lib/each_in_batches.rb', line 23

def total_records
  @total_records
end

#total_timeObject

Returns the value of attribute total_time.



27
28
29
# File 'lib/each_in_batches.rb', line 27

def total_time
  @total_time
end

#verboseObject

Returns the value of attribute verbose.



16
17
18
# File 'lib/each_in_batches.rb', line 16

def verbose
  @verbose
end

Class Method Details

.check(*args) ⇒ Object



114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/each_in_batches.rb', line 114

def self.check(*args)
  if args.empty?
    puts self.help_text and return false
    #Are the values of these parameters going to be valid integers?
  elsif args.first[:batch_size] && (args.first[:batch_size].to_s.gsub(/\d/,'foo') == args.first[:batch_size].to_s)
    puts self.help_text and return false
  elsif args.first[:last_batch] && (args.first[:last_batch].to_s.gsub(/\d/,'foo') == args.first[:last_batch].to_s)
    puts self.help_text and return false
  elsif args.first[:first_batch] && (args.first[:first_batch].to_s.gsub(/\d/,'foo') == args.first[:first_batch].to_s)
    puts self.help_text and return false
  else
    return true
  end
end

.help_textObject



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/each_in_batches.rb', line 39

def self.help_text
  <<-HEREDOC
      Arguments for the initializer (Batch.new) method are:

    Required:

      :arel         - Usage: :arel => MyClass.some_scope.order("some_column ASC")
                        Required, as this is the class that will be batched

    Optional:

      :backwards     - Usage: :backwards => true or false
                        Whether or not the batches should be processed in reverse order or not.
                        NOTE: deletions must be processed backwards or you eat the set as you process
                              and end the run half way through
                        Default: false (if not provided)

      :verbose       - Usage: :verbose => true or false
                        Sets verbosity of output
                        Default: false (if not provided)

      :batch_size    - Usage: :batch_size => x
                        Where x is some number.
                        How many AR Objects should be processed at once?
                        Default: 50 (if not provided)

      :last_batch    - Usage: :last_batch => x
                        Where x is some number.
                        Only process up to and including batch #x.
                          Batch numbers start at 0 for the first batch.
                        Default: won't be used (no limit if not provided)

      :first_batch   - Usage: first_batch => x
                        Where x is some number.
                        Begin processing batches beginning at batch #x.
                          Batch numbers start at 0 for the first batch.
                        Default: won't be used (no offset if not provided)

      :show_results  - Usage: :show_results => true or false
                        Prints statistics about the results of Batch#run.
                        Default: true if verbose is set to true and :show_results is not provided, otherwise false

   EXAMPLE:

     To create a new Batch, call Batch#new and pass it the class and any additional arguements (all as a hash).

       batch = EachInBatches::Batch.new(:arel => Payment.canceled.order("transaction_id ASC"), :batch_size => 50)

     To process the batched data, pass a block to Batch#run the same way you would to an object returned by

       Klass.all.each {|x| x.method}

     Batch#run will pass the data to your block, one at a time, in batches set by the :batch_size argument.

       batch.run {|x| puts x.id; puts x.transaction_id}

     Print the results!

       batch.print_results

     Or...

     Consolidate your code if you prefer

       EachInBatches::Batch.new(:arel => Payment.canceled.order("transaction_id ASC"), :batch_size => 50, :show_results => true).run{|x| puts x.id; puts x.transaction_id}

   Interpreting the output:
     '[O]' means the batch was skipped due to an offset.
     '[L]' means the batch was skipped due to a limit.
     '[P]' means the batch is processing.
     '[C]' means the batch is complete.
     and yes... it was a coincidence.  This class is not affiliated with 'one laptop per child'
  HEREDOC
end

Instance Method Details

#is_first_run?Boolean

Returns:

  • (Boolean)


202
203
204
205
# File 'lib/each_in_batches.rb', line 202

def is_first_run?
  #if no batches have been completed then we are in a first run situation
  self.completion_times.empty?
end


35
36
37
# File 'lib/each_in_batches.rb', line 35

def print_debug
  print "verbose: #{verbose}\nbatch_size: #{batch_size}\nbackwards: #{backwards}\nlast_batch: #{last_batch}\nfirst_batch: #{first_batch}\noffset_array: #{offset_array}\ntotal_records: #{total_records}\nsize_of_last_run: #{size_of_last_run}\nextra_run: #{extra_run}\nnum_runs: #{num_runs}\ntotal_time: #{total_time}\nelapsed_time: #{elapsed_time}\nstart_time: #{start_time}\nend_time: #{end_time}\noverhead_time: #{overhead_time}\ncompletion_times: #{completion_times.inspect}\nshow_results: #{show_results.inspect}\n"
end

Allow caller to override verbosity when called from console



255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/each_in_batches.rb', line 255

def print_results(verbose = self.verbose)
  printf "Results..."
  printf "Average time per complete batch was %.1f seconds\n", (self.total_time/Float(self.num_runs)) unless self.num_runs < 1
  printf "Total time elapsed was %.1f seconds, about #{self.elapsed_time/60} minute(s)\n", (self.elapsed_time)
  if self.backwards # When backwards might be deleting records
    puts "Total # of #{self.arel.table} - Before: #{self.total_records}"
    puts "Total # of #{self.arel.table} - After : #{self.arel.count}"
  end
  # With a large number of batches this is far too verbose, but don't want to introduce a more complicated verbosity setting.
  # if verbose
  #   puts "Completion times for each batch:"
  #   self.completion_times.each do |x|
  #     puts "Batch #{x[0]}: Time Elapsed: #{x[1][:elapsed]}s, Begin: #{x[1][:begin_time].strftime("%m.%d.%Y %I:%M:%S %p")}, End: #{x[1][:end_time].strftime("%m.%d.%Y %I:%M:%S %p")}"
  #   end
  # end
end

#run(&block) ⇒ Object



207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/each_in_batches.rb', line 207

def run(&block)
  return false unless block_given?
  self.start_time = Time.current
  puts "There are no batches to run" and return false unless self.num_runs > 0
  self.total_time = 0
  self.completion_times = Array.new
  self.offset_array.each_with_index do |offset, current_batch|
    if self.backwards && self.is_first_run?
      limite = self.size_of_last_run
    else
      limite = self.batch_size
    end
    if self.first_batch > current_batch
      print "[O] #{show_status(current_batch, limite)} skipped" if self.verbose
      self.skipped_batches << current_batch
    elsif self.last_batch && self.last_batch < current_batch
      print "[L] #{show_status(current_batch, limite)} skipped" if self.verbose
      self.skipped_batches << current_batch
    else
      print "[P] #{show_status(current_batch, limite)}" if self.verbose

      #start the timer
      beg_time = Time.current

      self.arel.limit(limite).offset(offset).each {|obj| yield obj}

      #stop the timer
      fin_time = Time.current

      this_time = fin_time.to_i - beg_time.to_i
      self.total_time += this_time unless extra_run && current_batch == self.num_runs
      puts "[C] #{show_status(current_batch, limite)} in #{this_time} seconds" if self.verbose
      self.completion_times << [current_batch, {:elapsed => this_time, :begin_time => beg_time, :end_time => fin_time}]
    end
  end
  self.num_runs -= 1 if self.extra_run
  self.end_time = Time.current
  self.elapsed_time = (self.end_time.to_i - self.start_time.to_i)
  self.overhead_time = self.elapsed_time - self.total_time
  print_results if self.show_results
  return "Process Complete"
end

#show_status(current_batch, limite) ⇒ Object



250
251
252
# File 'lib/each_in_batches.rb', line 250

def show_status(current_batch, limite)
  "{#{current_batch} / #{self.last_batch} / #{limite}}"
end