Class: Backup

Inherits:
Object
  • Object
show all
Includes:
Validations
Defined in:
lib/backup.rb

Overview

Used to backup a mogilefs domain using a backup profile.

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Validations

#check_backup_path, #check_mogile_domain, #check_settings_file, #connect_sqlite, #create_sqlite_db, #migrate_sqlite, #mogile_db_connect, #mogile_tracker_connect

Constructor Details

#initialize(o = {}) ⇒ Backup

Run validations and prepare the object for a backup

Parameters:

  • o (Hash) (defaults to: {})

    hash containing the settings for the backup



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/backup.rb', line 8

def initialize(o={})

  #Load up the settings file
  check_settings_file
  settings = YAML::load(File.open("#{$backup_path}/settings.yml"))
  @db = settings['db']
  @db_host = settings['db_host']
  @db_port = settings['db_port']
  @db_pass = settings['db_pass']
  @db_user = settings['db_user']
  @domain = settings['domain']
  @tracker_ip = settings['tracker_ip']
  @tracker_port = settings['tracker_port']
  @workers = o[:workers] if o[:workers]


  #run validations and setup
  raise unless check_backup_path
  create_sqlite_db
  connect_sqlite
  migrate_sqlite
  mogile_db_connect
  mogile_tracker_connect
  check_mogile_domain(domain)

  require ('domain')
  require('file')
  require('bakfile')
  require('fileclass')
end

Instance Attribute Details

#dbObject

Returns the value of attribute db.



3
4
5
# File 'lib/backup.rb', line 3

def db
  @db
end

#db_hostObject

Returns the value of attribute db_host.



3
4
5
# File 'lib/backup.rb', line 3

def db_host
  @db_host
end

#db_passObject

Returns the value of attribute db_pass.



3
4
5
# File 'lib/backup.rb', line 3

def db_pass
  @db_pass
end

#db_portObject

Returns the value of attribute db_port.



3
4
5
# File 'lib/backup.rb', line 3

def db_port
  @db_port
end

#db_userObject

Returns the value of attribute db_user.



3
4
5
# File 'lib/backup.rb', line 3

def db_user
  @db_user
end

#domainObject

Returns the value of attribute domain.



3
4
5
# File 'lib/backup.rb', line 3

def domain
  @domain
end

#tracker_hostObject

Returns the value of attribute tracker_host.



3
4
5
# File 'lib/backup.rb', line 3

def tracker_host
  @tracker_host
end

#tracker_portObject

Returns the value of attribute tracker_port.



3
4
5
# File 'lib/backup.rb', line 3

def tracker_port
  @tracker_port
end

#workersObject

Returns the value of attribute workers.



3
4
5
# File 'lib/backup.rb', line 3

def workers
  @workers
end

Instance Method Details

#backup(o = {}) ⇒ Object

The real logic for backing the domain up. It is pretty careful about making sure that it doesn’t report a file as backed up unless it actually was. Supports the ability to remove deleted files from the backup as well. We grab files from the mogilefs mysql server in groups of 500 * number of workers (default is 1 worker)

Parameters:

  • o (Hash) (defaults to: {})

    if :no_delete then don’t remove deleted files from the backup (intensive process)



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/backup.rb', line 138

def backup(o = {})

  #Loop over the main backup logic.  We'll break out at the end unless o[:non_stop] is set
  loop do
    files = []
    #first we retry files that we haven't been able to backup successfully, if any.
    BakFile.find_each(:conditions => ['saved = ?', false]) do |bak_file|
      files << bak_file
    end

    launch_backup_workers(files)

    #now back up any new files.  if they fail to be backed up we'll retry them the next time the backup
    #command is ran.
    dmid = Domain.find_by_namespace(self.domain)
    results = Fid.find_in_batches(:conditions => ['dmid = ? AND fid > ?', dmid, BakFile.max_fid], :batch_size => 500 * self.workers.to_i, :include => [:domain, :fileclass]) do |batch|

      #Insert all the files into our bak db with :saved false so that we don't think we backed up something that crashed
      files = []
      batch.each do |file|
        files << BakFile.new(:fid => file.fid,
                             :domain => file.domain.namespace,
                             :dkey => file.dkey,
                             :length => file.length,
                             :classname => file.classname,
                             :saved => false)
      end

      #There is no way to do a bulk insert in sqlite so this generates a lot of inserts.  wrapping all of the inserts
      #inside a single transaction makes it much much faster.
      BakFile.transaction do
        BakFile.import files, :validate => false
      end

      #Fire up the workers now that we have work for them to do
      launch_backup_workers(files)

      #Terminate program if the signal handler says so and this is a clean place to do it
      return true if SignalHandler.instance.should_quit
    end

    #Delete files from the backup that no longer exist in the mogilefs domain.  Unfortunently there is no easy way to detect
    #which files have been deleted from the MogileFS domain.  Our only option is to brute force our way through.  This is a bulk
    #query that checks a thousand files in each query against the MogileFS database server.  The query is kind of tricky because
    #I wanted to do this with nothing but SELECT privileges which meant I couldn't create a temporary table (which would require,
    #create temporary table and insert privleges).  You might want to only run this operation every once and awhile if you have a
    #very large domain.  In my testing,  it is able to get through domains with millions of files in a matter of a second.  So
    #all in all it's not so bad
    if !o[:no_delete]
      Log.instance.info("Start: Search for files to delete")
      BakFile.find_in_batches { |bak_files|
        union = "SELECT #{bak_files.first.fid} as fid"
        bak_files.shift
        bak_files.each do |bakfile|
          union = "#{union} UNION SELECT #{bakfile.fid}"
        end
        connection = ActiveRecord::Base.connection
        files = connection.select_values("SELECT t1.fid FROM (#{union}) as t1 LEFT JOIN file on t1.fid = file.fid WHERE file.fid IS NULL")
        launch_delete_workers(files)

        #Terminate program if the signal handler says so and this is a clean place to do it
        return true if SignalHandler.instance.should_quit
      }
      Log.instance.info("End: Search for files to delete")
    end

    #Break out of infinite loop unless o[:non_stop] is set
    break unless o[:non_stop]
    sleep 1
  end

end

#bak_file(file) ⇒ Bool

Create a backup of a file using a BakFile object

Parameters:

  • file (BakFile)

    file that needs to be backed up

Returns:

  • (Bool)

    file save result



43
44
45
46
47
48
49
50
51
52
# File 'lib/backup.rb', line 43

def bak_file(file)
  saved = file.bak_it
  if saved
     Log.instance.info("Backed up: FID #{file.fid}")
  else
     Log.instance.info("Error - will try again on next run: FID #{file.fid}")
  end

  return saved
end

#launch_backup_workers(files) ⇒ Object

Launch workers to backup an array of BakFiles

Parameters:

  • files (Array)

    must be an array of BakFiles



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/backup.rb', line 56

def launch_backup_workers(files)

  #This proc will process the results of the child proc
  parent = Proc.new { |results|
    fids = []

    results.each do |result|
      file = result[:file]
      saved = result[:saved]
      fids << file.fid if saved
    end

    #bulk update all the fids.  much faster then doing it one at a time
    BakFile.update_all({:saved => true}, {:fid => fids})

    #release the connection from the connection pool
    SqliteActiveRecord.clear_active_connections!
  }

  #This proc receives an array of BakFiles,  proccesses them,  and returns a result array to the parent proc. We will break
  #from the files if the signal handler says so.
  child = Proc.new { |files|
    result = []
    files.each do |file|
      break if file.nil?
      break if SignalHandler.instance.should_quit
      saved = bak_file(file)
      result << {:saved => saved, :file => file}
    end
    result
  }

  #launch workers using the above procs and files
  Forkinator.hybrid_fork(self.workers.to_i, files, parent, child)
end

#launch_delete_workers(fids) ⇒ Object

Launch workers to delete an array of files param [Array] files must be an array of BakFiles that need to be deleted



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/backup.rb', line 94

def launch_delete_workers(fids)

  #This proc receives an array of BakFiles, handles them,  and spits them back to the parent, break from the fids if
  #the signal handler says so.
  child = Proc.new { |fids|
    result = []
    fids.each do |fid|
      break if fid.nil?
      break if SignalHandler.instance.should_quit
      deleted = BakFile.delete_from_fs(fid)
      if deleted
        Log.instance.info("Deleting from backup: FID #{fid}")
      else
        Log.instance.info("Failed to delete from backup: FID #{fid}")
      end

      result << fid
    end
    result
  }

  #This proc will process the results of the child proc
  parent = Proc.new { |results|
    fids = []

    results.each do |result|
      fids << result
    end

    BakFile.delete_all({:fid => fids})

    #release the connection from the connection pool
    SqliteActiveRecord.clear_active_connections!
  }

  #launch workers using the above procs and files
  Forkinator.hybrid_fork(self.workers.to_i, fids, parent, child)

end