Class: Hadupils::Commands::Cleanup

Inherits:
SimpleCommand show all
Includes:
Options::DryRun, Extensions::Dfs, Extensions::Runners, Helpers::Dfs, Helpers::TextHelper
Defined in:
lib/hadupils/commands.rb

Instance Attribute Summary collapse

Attributes inherited from SimpleCommand

#params

Instance Method Summary collapse

Methods included from Options::DryRun

#perform_dry_run?

Methods included from Helpers::TextHelper

#pluralize

Methods included from Helpers::Dfs

#all_expired?, #dir_candidates, #dir_empty?, #hadupils_tmpfile?, #hadupils_tmpfiles, #parse_count, #parse_ls

Methods inherited from SimpleCommand

run, #successful?

Constructor Details

#initialize(params) ⇒ Cleanup

Returns a new instance of Cleanup.



203
204
205
206
207
208
209
# File 'lib/hadupils/commands.rb', line 203

def initialize(params)
  super(params)
  @expired_exitstatuses = []
  @rm_exitstatuses      = []
  @tmp_path             = (perform_dry_run? ? params[1] : params[0]) || TmpFile.tmp_path
  @tmp_ttl              = ((perform_dry_run? ? params[2] : params[1]) || TmpFile.tmp_ttl).to_i
end

Instance Attribute Details

#expired_exitstatusesObject

Returns the value of attribute expired_exitstatuses.



198
199
200
# File 'lib/hadupils/commands.rb', line 198

def expired_exitstatuses
  @expired_exitstatuses
end

#rm_exitstatusesObject

Returns the value of attribute rm_exitstatuses.



199
200
201
# File 'lib/hadupils/commands.rb', line 199

def rm_exitstatuses
  @rm_exitstatuses
end

#tmp_pathObject (readonly)

Returns the value of attribute tmp_path.



200
201
202
# File 'lib/hadupils/commands.rb', line 200

def tmp_path
  @tmp_path
end

#tmp_ttlObject (readonly)

Returns the value of attribute tmp_ttl.



201
202
203
# File 'lib/hadupils/commands.rb', line 201

def tmp_ttl
  @tmp_ttl
end

Instance Method Details

#has_expired?(dir_candidate, ttl) ⇒ Boolean

Returns:

  • (Boolean)


250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/hadupils/commands.rb', line 250

def has_expired?(dir_candidate, ttl)
  puts "Checking directory candidate: #{dir_candidate}"
  stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-count', dir_candidate]
  expired_exitstatuses << exitstatus
  if successful? exitstatus
    parsed_count = parse_count(stdout)
    if parsed_count.empty?
      $stderr.puts "Failed to parse dfs -count for stdout: #{stdout}"
      expired_exitstatuses << 255
    elsif dir_empty? parsed_count[:file_count]
      true
    else
      stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-ls', File.join(dir_candidate, '**', '*')]
      expired_exitstatuses << exitstatus
      if successful? exitstatus
        all_expired? parse_ls(stdout), ttl
      else
        $stderr.puts "Failed to perform dfs -ls on path: #{File.join(dir_candidate, '**', '*')}"
        false
      end
    end
  else
    $stderr.puts "Failed to perform dfs -count on path: #{dir_candidate}"
    false
  end
end

#runObject



211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/hadupils/commands.rb', line 211

def run
  # Removes old hadupils tmp files/dirs where all files within a tmpdir are also older than the TTL
  # User configurable by setting the ENV variable $HADUPILS_TMP_TTL, defaults to 86400 (last 24 hours)
  # User may also perform a dry-run via a -n or a --dry-run flag

  # Silence the Runner's shell STDOUT noise
  Shell.silence_stdout = true

  # Get candidate directories
  stdout, exitstatus = Hadupils::Commands::Hadoop.run ['fs', '-ls', tmp_path]
  if successful? exitstatus
    rm_array = []
    dir_candidates(hadupils_tmpfiles(parse_ls(stdout)), tmp_ttl).each do |dir_candidate|
      next unless has_expired? dir_candidate, tmp_ttl
      rm_array << dir_candidate
    end

    exitstatus = expired_exitstatuses.all? {|expired_exitstatus| expired_exitstatus == 0} ? 0 : 255
    if successful? exitstatus
      puts "Found #{pluralize(rm_array.length, 'item', 'items')} to be removed recursively"
      rm_array.each {|rm_item| puts rm_item }

      unless perform_dry_run?
        # Now want the user to see the Runner's shell STDOUT
        Shell.silence_stdout = false

        puts 'Removing...' unless rm_array.empty?
        rm_array.each do |dir|
          rm_stdout, rm_exitstatus = Hadupils::Commands::RmFile.run ['-r', dir]
          rm_exitstatuses << rm_exitstatus
          $stderr.puts "Failed to recursively remove: #{dir}" unless successful? rm_exitstatus
        end
      end
      exitstatus = rm_exitstatuses.all? {|rm_exitstatus| rm_exitstatus == 0} ? 0 : 255
    end
  end
  [nil, exitstatus]
end