Class: EY::Stonith

Inherits:
Object
  • Object
show all
Includes:
EyApi
Defined in:
lib/stonith.rb

Instance Method Summary collapse

Methods included from EyApi

#call_api, #get_envs, #get_json

Constructor Details

#initialize(opts = {}) ⇒ Stonith

Returns a new instance of Stonith.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/stonith.rb', line 26

def initialize(opts={})
  Log.write "Starting up"
  @opts = opts
  @rest = RestClient::Resource.new(opts[:api])
  @keys = {:aws_secret_id => @opts[:aws_secret_id], :aws_secret_key => @opts[:aws_secret_key]}
  @bad_checks = 0
  @seen_good_check = false
  @ec2 = RightAws::Ec2.new(@opts[:aws_secret_id], @opts[:aws_secret_key])
  @taking_over = false
  get_local_json
  get_master_from_json
  setup_traps
  start
  am_i_master?
end

Instance Method Details

#am_i_master?Boolean

Returns:

  • (Boolean)


126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/stonith.rb', line 126

def am_i_master?
  res = false
  @ec2.describe_addresses.each do |desc|
    res = true if (desc[:public_ip] == public_ip && desc[:instance_id] == instance_id)
  end
  if res
    db = get_mysql_handle
    db.execute("begin")
    rows = db.execute("select master_lock from locks for update")
    master = rows.fetch[0]
    rows.finish
    db.do("update locks set master_lock = '#{self_monitor_url}'")
    @master = self_monitor_url
    cancel_master_check_timer
    db.do("commit")
  end
  res
end

#cancel_master_check_timerObject



88
89
90
# File 'lib/stonith.rb', line 88

def cancel_master_check_timer
  @check_master_timer && @check_master_timer.cancel
end

#check_masterObject



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/stonith.rb', line 92

def check_master
  http = EventMachine::HttpRequest.new(@master).get :timeout => 10

  http.callback {
    unless http.response_header.status == 200
      take_over_as_master
    else
      @seen_good_check = true
      @bad_checks = 0
    end
    http.response_header.status
  }
  http.errback { |msg, err|
    take_over_as_master
  }
end

#get_local_jsonObject



182
183
184
# File 'lib/stonith.rb', line 182

def get_local_json
  @json = JSON.parse(IO.read("/etc/chef/dna.json"))
end

#get_master_from_jsonObject



186
187
188
189
190
# File 'lib/stonith.rb', line 186

def get_master_from_json
  if host = @json['master_app_server']['private_dns_name']
    @master = "http://#{host}/haproxy/monitor"
  end
end

#get_mysql_handleObject



47
48
49
# File 'lib/stonith.rb', line 47

def get_mysql_handle
  DBI.connect("DBI:Mysql:engineyard:#{@json['db_host']}", 'root', @opts[:dbpass])
end

#instance_idObject



160
161
162
# File 'lib/stonith.rb', line 160

def instance_id
  @instance_id ||= open("http://169.254.169.254/latest/meta-data/instance-id").read
end

#notify_awsmObject



145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/stonith.rb', line 145

def notify_awsm
  Log.write "Notifying awsm that I won"
  res = call_api("promote_instance_to_master", :instance_id => instance_id)
  case res['status']
  when 'ok'
    true
  when 'already_promoted'
    am_i_master?
    EM.add_timer(600) { setup_master_check_timer unless am_i_master? }
    true
  else
    false
  end
end

#private_dns_nameObject



164
165
166
# File 'lib/stonith.rb', line 164

def private_dns_name
  @private_dns_name ||= open("http://169.254.169.254/latest/meta-data/local-hostname").read
end

#public_ipObject



178
179
180
# File 'lib/stonith.rb', line 178

def public_ip
  @public_ip ||= @json['master_app_server']['public_ip']
end

#self_monitor_urlObject



168
169
170
# File 'lib/stonith.rb', line 168

def self_monitor_url
  "http://#{private_dns_name}/haproxy/monitor"
end

#setup_master_check_timerObject



81
82
83
84
85
86
# File 'lib/stonith.rb', line 81

def setup_master_check_timer
  cancel_master_check_timer
  unless self_monitor_url == @master
    @check_master_timer = EventMachine::PeriodicTimer.new(@opts[:heartbeat]) { check_master }
  end
end

#setup_trapsObject



42
43
44
45
# File 'lib/stonith.rb', line 42

def setup_traps
  trap("HUP") { cancel_master_check_timer; Log.write "timer canceled, not monitoring until you wake me up again"}
  trap("USR1") { EM.add_timer(600) { setup_master_check_timer unless am_i_master? }; Log.write "woke up, starting monitoring again in 10 minutes"}
end

#startObject



76
77
78
79
# File 'lib/stonith.rb', line 76

def start
  setup_master_check_timer
  EM.add_periodic_timer(300) { get_local_json }
end

#steal_ipObject



172
173
174
175
176
# File 'lib/stonith.rb', line 172

def steal_ip
  if @ec2.disassociate_address(public_ip)
    @ec2.associate_address(instance_id, public_ip)
  end
end

#take_over_as_masterObject



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/stonith.rb', line 109

def take_over_as_master
  Log.write("Got a bad check: seen good check is #{@seen_good_check.inspect}")
  @bad_checks += 1
  if @bad_checks > 5 && @seen_good_check && !@taking_over
    Log.write "I'm trying to take over!"
    @taking_over = true
    cancel_master_check_timer
    if try_lock(private_dns_name)
      Log.write("I got the lock!")
      steal_ip
      unless notify_awsm
        timer = EventMachine::PeriodicTimer.new(5) { timer.cancel if notify_awsm }
      end
    end
  end
end

#try_lock(nodename) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/stonith.rb', line 51

def try_lock(nodename)
  Log.write("Trying to grab the lock for: #{nodename}")
  db = get_mysql_handle
  db.execute("begin")
  res = db.execute("select master_lock from locks for update")
  master = res.fetch[0]
  res.finish
  got_lock = false
  if master == @master
    got_lock = true
    @master = "http://#{private_dns_name}/haproxy/monitor"
    db.do("update locks set master_lock = '#{@master}'")
  else
    # new master, don't start monitoring till it comes up
    @seen_good_check = false
    @taking_over = false
    @master = master
    Log.write("Failed to grab lock, relenting: #{nodename}\nmaster is: #{@master}")
    EM.add_timer(600) { Log.write "restarting monitoring"; setup_master_check_timer }
  end
  db.do("commit")
  db.disconnect
  got_lock
end