Class: Bosh::Agent::Alert

Inherits:
Object show all
Defined in:
lib/bosh_agent/alert.rb

Constant Summary collapse

ALERT_RETRIES =
3
RETRY_PERIOD =

second

1
SEVERITY_CUTOFF =
5
DEFAULT_SEVERITY =
2
ALERT =

use same values as syslog

1
CRITICAL =
2
ERROR =
3
WARNING =
4
IGNORED =
-1
SEVERITY_MAP =
{
  "action done" => IGNORED,
  "checksum failed" => CRITICAL,
  "checksum changed" => WARNING,
  "checksum succeeded" => IGNORED,
  "checksum not changed" => IGNORED,
  "connection failed" => ALERT,
  "connection succeeded" => IGNORED,
  "connection changed" => ERROR,
  "connection not changed" => IGNORED,
  "content failed" => ERROR,
  "content succeeded" => IGNORED,
  "content match" => IGNORED,
  "content doesn't match" => ERROR,
  "data access error" => ERROR,
  "data access succeeded" => IGNORED,
  "data access changed" => WARNING,
  "data access not changed" => IGNORED,
  "execution failed" => ALERT,
  "execution succeeded" => IGNORED,
  "execution changed" => WARNING,
  "execution not changed" => IGNORED,
  "filesystem flags failed" => ERROR,
  "filesystem flags succeeded" => IGNORED,
  "filesystem flags changed" => WARNING,
  "filesystem flags not changed" => IGNORED,
  "gid failed" => ERROR,
  "gid succeeded" => IGNORED,
  "gid changed" => WARNING,
  "gid not changed" => IGNORED,
  "heartbeat failed" => ERROR,
  "heartbeat succeeded" => IGNORED,
  "heartbeat changed" => WARNING,
  "heartbeat not changed" => IGNORED,
  "icmp failed" => CRITICAL,
  "icmp succeeded" => IGNORED,
  "icmp changed" => WARNING,
  "icmp not changed" => IGNORED,
  "monit instance failed" => ALERT,
  "monit instance succeeded" => IGNORED,
  "monit instance changed" => IGNORED,
  "monit instance not changed" => IGNORED,
  "invalid type" => ERROR,
  "type succeeded" => IGNORED,
  "type changed" => WARNING,
  "type not changed" => IGNORED,
  "does not exist" => ALERT,
  "exists" => IGNORED,
  "existence changed" => WARNING,
  "existence not changed" => IGNORED,
  "permission failed" => ERROR,
  "permission succeeded" => IGNORED,
  "permission changed" => WARNING,
  "permission not changed" => IGNORED,
  "pid failed" => CRITICAL,
  "pid succeeded" => IGNORED,
  "pid changed" => WARNING,
  "pid not changed" => IGNORED,
  "ppid failed" => CRITICAL,
  "ppid succeeded" => IGNORED,
  "ppid changed" => WARNING,
  "ppid not changed" => IGNORED,
  "resource limit matched" => ERROR,
  "resource limit succeeded" => IGNORED,
  "resource limit changed" => WARNING,
  "resource limit not changed" => IGNORED,
  "size failed" => ERROR,
  "size succeeded" => IGNORED,
  "size changed" => ERROR,
  "size not changed" => IGNORED,
  "timeout" => CRITICAL,
  "timeout recovery" => IGNORED,
  "timeout changed" => WARNING,
  "timeout not changed" => IGNORED,
  "timestamp failed" => ERROR,
  "timestamp succeeded" => IGNORED,
  "timestamp changed" => WARNING,
  "timestamp not changed" => IGNORED,
  "uid failed" => CRITICAL,
  "uid succeeded" => IGNORED,
  "uid changed" => WARNING,
  "uid not changed" => IGNORED
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(attrs) ⇒ Alert

Returns a new instance of Alert.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/bosh_agent/alert.rb', line 20

def initialize(attrs)
  unless attrs.is_a?(Hash)
    raise ArgumentError, "#{self.class} expects an attributes Hash as a parameter"
  end

  @logger   = Config.logger
  @nats     = Config.nats
  @agent_id = Config.agent_id
  @state    = Config.state

  @id          = attrs[:id]
  @service     = attrs[:service]
  @event       = attrs[:event]
  @action      = attrs[:action]
  @date        = attrs[:date]
  @description = attrs[:description]
  @severity    = self.calculate_severity
end

Instance Attribute Details

#actionObject (readonly)

The main area of responsibility for this class is conversion of Monit alert format to BOSH Health Monitor alert format.



14
15
16
# File 'lib/bosh_agent/alert.rb', line 14

def action
  @action
end

#dateObject (readonly)

The main area of responsibility for this class is conversion of Monit alert format to BOSH Health Monitor alert format.



14
15
16
# File 'lib/bosh_agent/alert.rb', line 14

def date
  @date
end

#descriptionObject (readonly)

The main area of responsibility for this class is conversion of Monit alert format to BOSH Health Monitor alert format.



14
15
16
# File 'lib/bosh_agent/alert.rb', line 14

def description
  @description
end

#eventObject (readonly)

The main area of responsibility for this class is conversion of Monit alert format to BOSH Health Monitor alert format.



14
15
16
# File 'lib/bosh_agent/alert.rb', line 14

def event
  @event
end

#idObject (readonly)

The main area of responsibility for this class is conversion of Monit alert format to BOSH Health Monitor alert format.



14
15
16
# File 'lib/bosh_agent/alert.rb', line 14

def id
  @id
end

#serviceObject (readonly)

The main area of responsibility for this class is conversion of Monit alert format to BOSH Health Monitor alert format.



14
15
16
# File 'lib/bosh_agent/alert.rb', line 14

def service
  @service
end

#severityObject (readonly)

The main area of responsibility for this class is conversion of Monit alert format to BOSH Health Monitor alert format.



14
15
16
# File 'lib/bosh_agent/alert.rb', line 14

def severity
  @severity
end

Class Method Details

.register(attrs) ⇒ Object



16
17
18
# File 'lib/bosh_agent/alert.rb', line 16

def self.register(attrs)
  new(attrs).register
end

Instance Method Details

#calculate_severityObject



90
91
92
93
94
95
96
97
98
# File 'lib/bosh_agent/alert.rb', line 90

def calculate_severity
  known_severity = SEVERITY_MAP[@event.to_s.downcase]
  if known_severity.nil?
    @logger.warn("Unknown monit event name `#{@event}', using default severity #{DEFAULT_SEVERITY}")
    DEFAULT_SEVERITY
  else
    known_severity
  end
end

#converted_alert_dataObject



65
66
67
68
69
70
71
72
73
74
75
# File 'lib/bosh_agent/alert.rb', line 65

def converted_alert_data
  # INPUT: id, service, event, action, date, description
  # OUTPUT: id, severity, title, summary, created_at (unix timestamp)
  {
    "id"         => @id,
    "severity"   => self.calculate_severity,
    "title"      => self.title,
    "summary"    => @description,
    "created_at" => self.timestamp
  }
end

#registerObject

As we don’t (currently) require ACKs for alerts we might need to send alerts several times in case HM temporarily goes down



41
42
43
44
45
46
47
48
49
# File 'lib/bosh_agent/alert.rb', line 41

def register
  return if severity >= SEVERITY_CUTOFF || severity <= 0

  ALERT_RETRIES.times do |i|
    EM.add_timer(i * RETRY_PERIOD) do
      send_via_mbus
    end
  end
end

#send_via_mbusObject



51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/bosh_agent/alert.rb', line 51

def send_via_mbus
  if @state.nil?
    @logger.warn("Unable to send alert: unknown agent state")
    return
  end

  if @state["job"].blank?
    @logger.info("No job, ignoring alert")
    return
  end

  @nats.publish("hm.agent.alert.#{@agent_id}", Yajl::Encoder.encode(converted_alert_data))
end

#timestampObject



83
84
85
86
87
88
# File 'lib/bosh_agent/alert.rb', line 83

def timestamp
  Time.rfc822(@date).utc.to_i
rescue ArgumentError => e
  @logger.warn("Cannot parse monit alert date `#{@date}', using current time instead")
  Time.now.utc.to_i
end

#titleObject



77
78
79
80
81
# File 'lib/bosh_agent/alert.rb', line 77

def title
  ips = @state.ips
  service = ips.size > 0 ? "#{@service} (#{ips.sort.join(", ")})" : @service
  "#{service} - #{@event} - #{@action}"
end