Class: EtTools::NatMonitor

Inherits:
Object
  • Object
show all
Defined in:
lib/nat-monitor.rb,
lib/nat-monitor/version.rb

Constant Summary collapse

VERSION =
'4.0.1'

Instance Method Summary collapse

Constructor Details

#initialize(conf_file = nil) ⇒ NatMonitor

Returns a new instance of NatMonitor.



9
10
11
# File 'lib/nat-monitor.rb', line 9

def initialize(conf_file = nil)
  @conf = defaults.merge load_conf(conf_file)
end

Instance Method Details

#am_i_master?Boolean

Returns:

  • (Boolean)


151
152
153
# File 'lib/nat-monitor.rb', line 151

def am_i_master?
  master_node? my_instance_id
end

#connectionObject



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/nat-monitor.rb', line 118

def connection
  @connection ||= begin
    if @conf['aws_access_key_id']
      options = { aws_access_key_id: @conf['aws_access_key_id'],
                  aws_secret_access_key: @conf['aws_secret_access_key'] }
    else
      options = { use_iam_profile: true }
    end

    options[:region] = @conf['region'] if @conf['region']
    options[:endpoint] = @conf['aws_url'] if @conf['aws_url']

    Fog::Compute::AWS.new(options)
  end
end

#current_masterObject



134
135
136
137
138
139
140
# File 'lib/nat-monitor.rb', line 134

def current_master
  default_r =
    connection.route_tables.get(@conf['route_table_id']).routes.find do |r|
      r['destinationCidrBlock'] == '0.0.0.0/0'
    end
  default_r['instanceId']
end

#defaultsObject



43
44
45
46
47
48
# File 'lib/nat-monitor.rb', line 43

def defaults
  { 'pings' => 3,
    'ping_timeout' => 1,
    'heartbeat_interval' => 10,
    'monitor_enabled' => false }
end

#heartbeatObject



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/nat-monitor.rb', line 65

def heartbeat
  if am_i_master?
    output "Looks like I'm the master"
    return
  end
  un = unreachable_nodes
  return if un.empty?
  if un.count == other_nodes.keys.count # return if I'm unreachable...
    output "No nodes are reachable. Seems I'm the unreachable one."
    return
  end
  cm = current_master
  unless un.include?(cm) # ...unless master is unreachable
    output "Unreachable nodes: #{un.inspect}"
    output "Current master (#{cm}) is still reachable"
    return
  end
  steal_route
end

#load_conf(conf_file = nil) ⇒ Object



13
14
15
# File 'lib/nat-monitor.rb', line 13

def load_conf(conf_file = nil)
  YAML.load_file(conf_file || '/etc/nat_monitor.yml')
end

#main_loopObject



50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/nat-monitor.rb', line 50

def main_loop
  loop do
    begin
      notify_monitor 'run'
      heartbeat
      notify_monitor 'complete'
    rescue => e
      msg = "Caught #{e.class} exception: #{e.message}"
      notify_monitor 'fail', msg
      output e.backtrace
    end
    sleep @conf['heartbeat_interval']
  end
end

#master_node?(node_id) ⇒ Boolean

Returns:

  • (Boolean)


155
156
157
# File 'lib/nat-monitor.rb', line 155

def master_node?(node_id)
  current_master == node_id
end

#monitorObject



159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/nat-monitor.rb', line 159

def monitor
  return unless @conf['monitor_enabled']

  if @conf['monitor_token']
    @monitor ||= ::Cronitor.new(
      token: @conf['monitor_token'],
      opts: @conf['monitor_opts']
    )
  else
    @monitor ||= ::Cronitor.new code: @conf['monitor_code']
  end
end

#my_instance_idObject



142
143
144
145
146
147
148
149
# File 'lib/nat-monitor.rb', line 142

def my_instance_id
  @my_instance_id ||= begin
    Net::HTTP.get(
      '169.254.169.254',
      '/latest/meta-data/instance-id'
    )
  end
end

#notify_monitor(status, msg = nil) ⇒ Object



172
173
174
175
176
177
178
179
# File 'lib/nat-monitor.rb', line 172

def notify_monitor(status, msg = nil)
  output msg unless msg.nil?
  return unless @conf['monitor_enabled']

  output 'Notifying Cronitor'

  monitor.ping status, msg
end

#other_nodesObject



100
101
102
103
104
105
106
# File 'lib/nat-monitor.rb', line 100

def other_nodes
  @other_nodes ||= begin
    nodes = @conf['nodes'].dup
    nodes.delete my_instance_id
    nodes
  end
end

#pingable?(ip) ⇒ Boolean

Returns:

  • (Boolean)


108
109
110
111
112
# File 'lib/nat-monitor.rb', line 108

def pingable?(ip)
  p = Net::Ping::External.new(ip)
  p.timeout = @conf['ping_timeout']
  p.ping?
end

#route_exists?(route_id) ⇒ Boolean

Returns:

  • (Boolean)


114
115
116
# File 'lib/nat-monitor.rb', line 114

def route_exists?(route_id)
  connection.route_tables.map(&:id).include? route_id
end

#runObject



17
18
19
20
21
# File 'lib/nat-monitor.rb', line 17

def run
  validate!
  output 'Starting NAT Monitor'
  main_loop
end

#steal_routeObject



85
86
87
88
89
90
91
92
93
94
# File 'lib/nat-monitor.rb', line 85

def steal_route
  output 'Stealing route 0.0.0.0/0 on route table ' \
         "#{@conf['route_table_id']}"
  return if @conf['mocking']
  connection.replace_route(
    @conf['route_table_id'],
    '0.0.0.0/0',
    'InstanceId' => my_instance_id
  )
end

#unreachable_nodesObject



96
97
98
# File 'lib/nat-monitor.rb', line 96

def unreachable_nodes
  other_nodes.select { |_node, ip| !pingable?(ip) }
end

#validate!Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/nat-monitor.rb', line 23

def validate!
  exit_code =
    case
    when !@conf['route_table_id']
      msg = 'route_table_id not specified'
      1
    when !route_exists?(@conf['route_table_id'])
      msg = "Route #{@conf['route_table_id']} not found"
      2
    when @conf['nodes'].count < 3
      msg = '3 or more nodes are required to create a quorum'
      3
    end

  return unless exit_code

  notify_monitor 'fail', msg
  exit exit_code
end