Class: Reptile::ReplicationMonitor

Inherits:
Object
  • Object
show all
Defined in:
lib/reptile/replication_monitor.rb

Class Method Summary collapse

Class Method Details

.check_slavesObject

Checks the status of each slave.



86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/reptile/replication_monitor.rb', line 86

def self.check_slaves
  Log.info "Checking slave status."
  databases.slaves.each do |slave_name, slave_configs|
    status = Status.check_slave_status(slave_name, slave_configs)
    Log.info "'#{slave_name}' is '#{status}'"
    if status != Status.const_get(:RUNNING)
      log_replication_error :host => slave_name,
                                :database => configs[:database],
                                :status_error => Status.get_error_message(status),
                                :noticed_at => Time.now
    end
  end
end

.configsObject

Returns the configs from the replication.yml file



25
26
27
# File 'lib/reptile/replication_monitor.rb', line 25

def self.configs
  @configs
end

.databasesObject

Returns the databases from the yml file.



30
31
32
# File 'lib/reptile/replication_monitor.rb', line 30

def self.databases
  @databases
end

.diff_tablesObject



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/reptile/replication_monitor.rb', line 39

def self.diff_tables
  Log.info "Checking row counts."
  unsynced_dbs = 0

  databases.databases.each_pair do |name, roles|
    master, slave = roles['master'], roles['slave']
    deltas = DeltaMonitor.diff(name, master, slave)

    egregious_deltas = deltas.select{|table, delta| delta > configs['row_difference_threshold'] }
    if egregious_deltas.size > 0
      log_replication_error :host => master["host"], :database => master["database"], :deltas => egregious_deltas, :noticed_at => Time.now
      unsynced_dbs += 1
    end
  end

  unsynced_dbs.zero?
end

.heartbeatObject



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/reptile/replication_monitor.rb', line 57

def self.heartbeat
  Log.info "Checking heartbeats."
  databases.masters.each_pair do |name, configs|
    Heartbeat.write(name, configs)
  end

  overdue_slaves = 0

  databases.slaves.each_pair do |name, db_configs|
    delay = Heartbeat.read(name, db_configs)
    if delay.nil?
      log_replication_error :host => name,
                                :database => configs[:database],
                                :general_error => "Error: No Heartbeats found.",
                                :noticed_at => Time.now
      overdue_slaves += 1
    elsif delay > configs['delay_threshold_secs']
      log_replication_error :host => name,
                                :database => configs[:database],
                                :delay => Heartbeat.strfdelay(delay),
                                :noticed_at => Time.now
      overdue_slaves += 1
    end
  end

  overdue_slaves.zero?
end

.load_config_file(databases_file) ⇒ Object

Attempts to load the replication.yml configuration file.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/reptile/replication_monitor.rb', line 5

def self.load_config_file(databases_file)
  @databases_file = databases_file
  yaml = YAML::load(File.read(@databases_file))
  @configs = yaml.delete('config')
  @users = Users.new(yaml.delete('users'))
  @databases = Databases.new(yaml)

  Heartbeat.user = users.heartbeat_user
  Runner.user = users.replication_user
  Status.user = users.replication_user
  DeltaMonitor.user = users.ro_user
  Runner.databases = databases

  raise "Please specify a delay threshold 'delay_threshold_secs: 360'" if @configs['delay_threshold_secs'].nil?
  raise "Please specify a row delta threshold 'row_difference_threshold: 10'" if @configs['row_difference_threshold'].nil?
rescue Errno::EACCES => e
  Log.error "Unable to open config file: Permission Denied"
end

.log_replication_error(options) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/reptile/replication_monitor.rb', line 100

def self.log_replication_error(options)
  Log.error "A replication error occured on #{options[:host]} at #{Time.now}"

  if options[:delay]
    Log.error "There was a #{options[:delay]} second replication latency, which is greater than the allowed latency of #{configs['delay_threshold_secs']} seconds"
  elsif options[:deltas]
    Log.error "The following tables have master/slave row count difference greater than the allowed #{configs['row_difference_threshold']}"
    options[:deltas].each do |table, delta|
      Log.error "   table '#{table}' was off by #{delta} rows"
    end
  elsif options[:status_error]
      Log.error "   MySQL Status message: #{options[:status_error]}"
  elsif options[:general_error]
      Log.error "   Error: #{options[:general_error]}"
  end

  Log.error "  Server: #{options[:host]}\n"
  Log.error "  Database: #{options[:database]}\n" unless options[:database].blank?
end

.reportObject



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/reptile/replication_monitor.rb', line 120

def self.report
  Log.info "Generating report"

  old_stdout = $stdout
  out = StringIO.new
  $stdout = out
  begin
    puts "Daily Replication Report for #{Time.now.strftime('%D')}"
    puts "                       Checking slave status"
    puts
    self.check_slaves
    puts
    puts
    puts "                       Checking table row counts"
    puts
    puts "The row count difference threshold is #{configs['row_difference_threshold']} rows"
    puts
    self.diff_tables
    puts
    puts
    puts "                       Checking replication heartbeat"
    puts
    puts "The heartbeat latency threshold is #{configs['delay_threshold_secs']} seconds"
    puts
    self.heartbeat
  ensure
     $stdout = old_stdout
  end
  puts out.string
end

.usersObject

Returns the Users loaded from the replication.yml file



35
36
37
# File 'lib/reptile/replication_monitor.rb', line 35

def self.users
  @users
end