Class: BackupUtility::Postgres

Inherits:
Base
  • Object
show all
Defined in:
lib/backup_utility/postgres.rb

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#backup, #cleanup_backups, #cleanup_dir, #cleanup_shared, #log, #log_and_time, #send_dir_to_s3, #send_file_to_s3, #shared_dst, #store_backup

Constructor Details

#initialize(db_info) ⇒ Postgres

Returns a new instance of Postgres.



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/backup_utility/postgres.rb', line 52

def initialize(db_info)
  super(db_info)
  @db_name = db_info.fetch(:db_name)
  @pg_user = db_info.fetch(:pg_user, 'postgres')
  config = ActiveRecord::Base.configurations[Rails.env]
  if config
    default_port = config['port']
  else
    default_port = nil
  end
  @pg_host = db_info.fetch(:pg_host, nil)
  @pg_port = db_info.fetch(:pg_port, default_port)
  @backup_user = db_info[:backup_user] || 'backup'
  @backup_format = db_info[:backup_format] || 'plain'
  raise "invalid format #{@backup_format}" if !['plain', 'custom', 'tar'].include?(@backup_format)
  @save = db_info.fetch(:save, true)
  @last_dst = nil
end

Class Method Details

.init(append = 'twist', include_env = true) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/backup_utility/postgres.rb', line 7

def self.init(append = 'twist', include_env = true)
  save = ENV['save'] != 'false'
  move_backups = true
  use_scp = use_code = false
  if ::Rails.env == 'staging'
    pg_user = 'admin'
    db_name = "#{append}_stage"
    send_to_s3 = false
    send_to_shared = true
  elsif ::Rails.env == 'production'
    pg_user = "#{append}_db"
    db_name = "#{append}_prod"
    send_to_s3 = true
    send_to_shared = true
    use_scp = use_code = append == 'stats'
    # move the backup files to shared, but only if its not stats, since it does its own thing
    move_backups = append != 'stats'
  elsif ::Rails.env == 'development'
    pg_user = ENV['pg_user'] || 'admin'
    db_name = ENV['db_name'] || "#{append}_dev"
    send_to_s3 = false
    # always set to false
    save = false
    send_to_shared = false
  end
  db_info = {:db_name => db_name, :pg_user => pg_user, 
    :backup_dir => ENV['backup_dir'], 
    :save => save, 
    :send_to_s3 => send_to_s3,
    :send_to_shared => send_to_shared,
    :backup_format => ENV['backup_format'],
    :use_code => use_code,
    :move_backups => move_backups,
    :use_scp => use_scp
  }
  if include_env
    # look for these keys in the ENV variable
    [:shared_dir, :backup_user, :backup_machine, :pg_host, :pg_port].each do |key|
      val = ENV[key.to_s]
      db_info[key] = val if val
    end
  end
  BackupUtility::Postgres.new(db_info)
end

Instance Method Details

#_dump(working_dir, append_string = '', label = '') ⇒ Object



113
114
115
116
117
118
119
120
121
122
# File 'lib/backup_utility/postgres.rb', line 113

def _dump(working_dir, append_string = '', label='')
  ext = custom? ? 'dump' : 'sql'
  dst = File.join(working_dir, "#{@db_name}#{label}.#{ext}.gz")
  log_and_time("dumping data (format #{@backup_format}) to #{dst}") do
    cmd = "#{pg_dump_command} #{append_string} #{@db_name} | gzip > #{dst}"
    @last_dst = dst
    `#{cmd}`
  end
  File.exists?(dst)
end

#custom?Boolean

Returns:

  • (Boolean)


92
93
94
# File 'lib/backup_utility/postgres.rb', line 92

def custom?
  @backup_format == 'custom'
end

#determine_label(table) ⇒ Object



195
196
197
198
199
200
201
202
203
# File 'lib/backup_utility/postgres.rb', line 195

def determine_label(table)
  if table =~ /hour/
    'hour'
  elsif table =~ /month/
    'month'
  else
    'day'
  end
end

#dump_daily_data(table, day, dump_dir) ⇒ Object



205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'lib/backup_utility/postgres.rb', line 205

def dump_daily_data(table, day, dump_dir)
  format = '%m-%d-%Y'
  start_str = day.strftime(format)
  # determine the label by the table
  label = determine_label(table)
  end_str = (day + 1.day).strftime(format)
  cmd = select_cmd(table, "#{label} >= '#{start_str}' AND #{label} < '#{end_str}'", label)
  dump_name = "#{start_str}.sql.gz"
  dump_file = File.join(dump_dir, dump_name)
  # if the file exists, skip it as it appears to be already dumped
  if File.exists?(dump_file)
    log("dump file #{dump_file} already created, skipping")
    return false
  end
  log("dumping stats to #{dump_file}")
  dump_cmd = psql_dump_cmd(cmd, dump_file)
  out = `#{dump_cmd}`
  ret = $?
  if !File.exists?(dump_file)
    log("could not create dump #{dump_file}")
    false
  end
  true
end

#dump_data(working_dir, ignore = []) ⇒ Object



100
101
102
103
104
# File 'lib/backup_utility/postgres.rb', line 100

def dump_data(working_dir, ignore = [])
  ignore_string = custom? ? '' : '--data-only'
  ignore.each {|i| ignore_string += ' -T '+i} if ignore.size > 0
  _dump(working_dir, ignore_string)
end

#dump_data_only(working_dir, only = []) ⇒ Object



106
107
108
109
110
111
# File 'lib/backup_utility/postgres.rb', line 106

def dump_data_only(working_dir, only = [])
  only_string = custom? ? '' : '--data-only'
  only.each {|o| only_string += ' -t '+o} if only.size > 0
  label = "-table-" + only.join('-')
  _dump(working_dir, only_string, label)
end

#dump_monthly_data(table, end_date = nil) ⇒ Object



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# File 'lib/backup_utility/postgres.rb', line 230

def dump_monthly_data(table, end_date = nil)
  # the sql directory structure will look like this
  # BACKUP_DIR
  #  -> monthly-12-2009
  #     -> hits_by_hour_video_geo
  #        -> 12-1.2009.sql.gz
  #        -> 12-2.2009.sql.gz
  #        -> 12-3.2009.sql.gz
  # then each directory under the monthly will be tarballed, 
  # and uploaded to s3 with the name monthly-12-2009-hits_by_hour_video_geo.tar.gz
  end_date = Time.now if end_date.blank?
  # normalize to beginning of month
  end_date = Time.utc(end_date.year, end_date.month, 1)
  format = '%m-%Y'
  start_day = (end_date - 1.month)
  # normalize to the beginning of month
  start_day = Time.utc(start_day.year, start_day.month, 1)
  start_str = start_day.strftime(format)
  table_dir = File.join(@backup_dir, "monthly-#{start_str}", table)
  FileUtils.makedirs(table_dir)
  # now iterate through all the days up till the end_date, and dump each file
  while start_day < end_date
    dump_daily_data(table, start_day, table_dir)
    start_day += 1.day
  end
  # once we have the dir, tar ball the entire table dir
  tar_ball = File.join(@backup_dir, "monthly-#{start_str}-#{table}.tar.gz")
  `tar czvf #{tar_ball} #{table_dir}`
  if File.exists?(tar_ball)
    send_file_to_s3(tar_ball)
    # now delete the tarball
    File.delete(tar_ball)
  else
    log("tar ball does not exist #{tar_ball}")
  end
end

#dump_schema(working_dir) ⇒ Object



96
97
98
# File 'lib/backup_utility/postgres.rb', line 96

def dump_schema(working_dir)
  _dump(working_dir, '--schema-only', '-schema')
end

#dump_table_by_id(table, id, working_dir) ⇒ Object



180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/backup_utility/postgres.rb', line 180

def dump_table_by_id(table, id, working_dir)
  where = "id > #{id}"
  cmd = select_cmd(table, where, 'id')
  dump_file = File.join(working_dir, "dump_#{table}_#{id}.sql.gz")
  dump_cmd = psql_dump_cmd(cmd, dump_file)
  log("dumping table by id to #{dump_file}")
  `#{dump_cmd}`
  ret = $?
  if !File.exists?(dump_file)
    log("could not create dump #{dump_file}")
    false
  end
  true
end

#get_paths(settings = {}, append = nil) ⇒ Object



71
72
73
74
75
76
77
78
79
80
# File 'lib/backup_utility/postgres.rb', line 71

def get_paths(settings = {}, append = nil)
  if !settings.fetch(:ignore, []).empty? || !settings.fetch(:only, []).empty?
    name = "partial"
    settings[:date_format] = '%Y-%m-%d-%H-%M'
  else
    name = "full"
  end
  name += "_#{append}" if append
  super(settings, name)
end

#perform_dump(settings, working_dir) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/backup_utility/postgres.rb', line 124

def perform_dump(settings, working_dir)
  # if the backup format is custom, do a full dump since we can do a bunch
  # of stuff with pg_restore, which means we don't need the individual dumps
  ignore = settings[:ignore] || []
  only = settings[:only]
  if custom?
    if !only.nil? && !only.empty?
      return dump_data_only(working_dir, only)
    else
      return dump_data(working_dir, ignore)
    end
  end
  if !settings[:skip_schema]
    if !dump_schema(working_dir)
      log("failed to dump schema")
      false
    end
  else
    log("skipping schema dump")
  end
  separate = settings[:separate]
  if separate
    # first dump the database without any of the separate tables
    log("dumping database without separate tables")
    if !dump_data(working_dir, separate + ignore)
      false
    end
    separate.each do |table|
      log("dumping data only for table #{table}")
      if !dump_data_only(working_dir, [table])
        log("failed to dump individual table #{table}")
        false
      end
    end
    true
  elsif !only.nil? && !only.empty?
    dump_data_only(working_dir, only)
  else
    dump_data(working_dir, ignore)
  end
end

#pg_dump_commandObject



82
83
84
85
86
87
88
89
90
# File 'lib/backup_utility/postgres.rb', line 82

def pg_dump_command
  config = Rails.configuration.database_configuration[Rails.env]
  password = config ['password']
  host = config['host']
  cmd = "export PGPASSWORD=#{password};#{BIN}/pg_dump -F#{@backup_format} -U #{@pg_user}"
  cmd += " -h #{host}"
  cmd += " -p #{@pg_port}" if @pg_port
  cmd
end

#psql_dump_cmd(cmd, dump_file) ⇒ Object



170
171
172
173
174
175
176
177
178
# File 'lib/backup_utility/postgres.rb', line 170

def psql_dump_cmd(cmd, dump_file)
  config = Rails.configuration.database_configuration[Rails.env]
  password = config ['password']
  host = config['host']
  cmd = "export PGPASSWORD=#{password};#{BIN}/psql -c \"#{cmd}\" -F '"
  cmd += 9.chr
  cmd += "' -A -d #{@db_name} -U #{@pg_user} -h #{host} -t | gzip > #{dump_file}"
  cmd
end

#select_cmd(table, where, label) ⇒ Object



166
167
168
# File 'lib/backup_utility/postgres.rb', line 166

def select_cmd(table, where, label)
  "select * from #{table} where #{where} ORDER BY #{label} ASC"
end