Class: ScoutAgent::Assignment::Snapshot

Inherits:
ScoutAgent::Assignment show all
Defined in:
lib/scout_agent/assignment/snapshot.rb

Overview

Invoke with:

scout_agent snap [force]

This command requests that a snapshot be taken of the environment the agent is running on. Snapshots are a collection of commands sent down from the Scout server that can be used to measure the current health of the environment. Their output, exit status, and run time are passed back up to the server in response to this request.

Note that this is just a request. It may not be honored if enough time hasn’t passed since the last snapshot. This is to protect your server from overmuch busy work, but you can choose to override this limitation with the optional “force” parameter.

Instance Attribute Summary

Attributes inherited from ScoutAgent::Assignment

#group, #other_args, #switches, #user

Instance Method Summary collapse

Methods inherited from ScoutAgent::Assignment

choose_group, choose_user, #initialize, plan, #prepare_and_execute

Methods included from Tracked

#clear_status, #force_status_database_reload, #status, #status_database, #status_log

Constructor Details

This class inherits a constructor from ScoutAgent::Assignment

Instance Method Details

#executeObject

Runs the snapshot command.



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/scout_agent/assignment/snapshot.rb', line 24

def execute
  # prepare the log
  log = ScoutAgent.prepare_wire_tap(:snapshot, :skip_stdout)
  
  # load the snapshot database
  unless db = Database.load(:snapshots, log)
    abort_with_missing_db
  end
  
  # 
  # lock on an external lock file to ensure only one process can run a
  # snapshot at a time
  # 
  (Plan.db_dir + "snapshot_in_progress.lock").open("a") do |lock|
    begin
      lock.chmod(0777)  # make sure this file is shared by all
    rescue Exception    # we didn't create the file
      # do nothing:  the creator already switched the permissions
    end
    unless lock.flock(File::LOCK_EX | File::LOCK_NB)
      exit  # snapshot in progress
    end
  
    # record our status and set removal at_exit()
    log.info("Building snapshot.")
    status_database(log)
    status("Building snapshot", :snapshot)
    at_my_exit do
      clear_status(:snapshot)
    end
    
    # reset commands, if requested
    if Array(other_args).shift == "force"
      log.info("Clearing command run times to force a full snapshot.")
      db.reset_all_commands
    end

    # read current commands
    commands = db.current_commands
  
    # bail out if there's no commands to run
    if commands.empty?
      if db.have_commands?
        abort_with_too_recent
      else
        log.warn("No commands were found.")
        abort_with_no_commands
      end
    end
    
    # build snapshot
    snapshot_started = Time.now
    commands.each do |command|  # run each command
      log.info("Running `#{command[:code]}`.")
      command_started = Time.now
      reader, writer  = IO.pipe
      # run the command in a child process
      run             = fork do
        reader.close
        STDOUT.reopen(writer)
        STDERR.reopen(writer)
        begin
          exec(command[:code])
        rescue Exception  # failed to execute
          warn "#{$!.message} (#{$!.class})"
        end
      end
      exit_status     = nil
      output          = nil
      writer.close
      # make sure the child process stops in a reasonable time
      begin
        Timeout.timeout(command[:timeout]) do
          output      = reader.read
          exit_status = Process.wait2(run).last
        end
      rescue Timeout::Error
        log.error("`#{command[:code]}` took too long to run.")
        exit_status   = Process.term_or_kill(run)
        output        = "Error:  This command took too long to run"
      end
      run_time        = Time.now - command_started
      # record results
      db.complete_run( command,
                       output,
                       exit_status,
                       snapshot_started,
                       run_time )
      log.debug( "`#{command[:code]}` exited (#{exit_status}) in " +
                 "#{run_time} seconds." )
    end
    
    # maintain the databases
    db.maintain
    status_database.maintain
    # clean out old logs
    ScoutAgent.remove_old_log_files(log)
    
    log.info("Snapshot complete.")
    lock.flock(File::LOCK_UN)  # release our snapshot lock
  end
end