Class: BackupTool
- Inherits:
-
Object
- Object
- BackupTool
- Defined in:
- lib/backuptool.rb
Instance Method Summary collapse
-
#buffered_download(remote, local) ⇒ Object
Download a file from HDFS, buffered way * Args : -
remote-> HDFS path -local-> local path. - #delete_snapshots(node: @cassandra.node_name, date: 'ALL') ⇒ Object
- #get_snapshot_metadata(node, date) ⇒ Object
- #get_snapshots_node(node, date) ⇒ Object
-
#initialize(cassandra, hadoop, logger) ⇒ BackupTool
constructor
Create a new BackupTool instance * Args : -
cassandra-> Cassandra instance -hadoop-> HDFS instance -logger-> Logger. - #list_snapshots(node: @cassandra.node_name) ⇒ Object
- #new_snapshot ⇒ Object
-
#restore_snapshot(node, date, destination) ⇒ Object
Restore a snapshot from HDFS * Args : -
node-> node where the snapshot comes from -date-> snapshot date -destination-> local directory where to restore. -
#search_snapshots(node: 'ALL', date: 'ALL') ⇒ Object
Look for snapshots * Args : -
node-> Cassandra node name -date-> HDFS instance.
Constructor Details
#initialize(cassandra, hadoop, logger) ⇒ BackupTool
Create a new BackupTool instance
-
Args :
-
cassandra-> Cassandra instance -
hadoop-> HDFS instance -
logger-> Logger
-
16 17 18 19 20 21 22 |
# File 'lib/backuptool.rb', line 16 def initialize(cassandra, hadoop, logger) @cassandra = cassandra @hadoop = hadoop @logger = logger = META_DIR end |
Instance Method Details
#buffered_download(remote, local) ⇒ Object
Download a file from HDFS, buffered way
-
Args :
-
remote-> HDFS path -
local-> local path
-
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/backuptool.rb', line 148 def buffered_download(remote, local) @logger.debug("#{remote} => #{local}") # Create the destination directory if not exists path = File.dirname(local) FileUtils.mkdir_p(path) unless File.exist?(path) file = open(local, 'wb') offset = 0 length = BUFFER_SIZE print '[' while length == BUFFER_SIZE print '#' content = @hadoop.read(remote, offset: offset, length: BUFFER_SIZE) file.write(content) length = content.length offset += length end print "]\n" file.close end |
#delete_snapshots(node: @cassandra.node_name, date: 'ALL') ⇒ Object
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/backuptool.rb', line 116 def delete_snapshots(node: @cassandra.node_name, date: 'ALL') snapshots = search_snapshots(node: node, date: date) if snapshots.empty? raise('No snapshot found for deletion') else snapshots.each do |snapshot| @logger.info("Deleting snapshot #{snapshot}") node_snapshots = search_snapshots(node: snapshot.node) = Set.new node_snapshots.each do |s| += s. if s != snapshot end files = snapshot. - @logger.info("#{files.length} files to delete") files.each do |file| @logger.info("Deleting file #{file}") remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file @logger.debug("DELETE => #{remote}") @hadoop.delete(remote) end @logger.info('Deleting metadata in Hadoop') remote = @hadoop.base_dir + '/' + + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date @logger.debug("DELETE => #{remote}") @hadoop.delete(remote) end end end |
#get_snapshot_metadata(node, date) ⇒ Object
31 32 33 34 35 36 |
# File 'lib/backuptool.rb', line 31 def (node, date) remote = @hadoop.base_dir + '/' + + '/' + @cassandra.cluster_name + '/' + node + '/cass_snap_' + date return @hadoop.read(remote).split("\n").to_set rescue Exception => e raise("Could not read metadata : #{e.message}") end |
#get_snapshots_node(node, date) ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/backuptool.rb', line 38 def get_snapshots_node(node, date) result = [] begin if date == 'ALL' ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}/#{node}") ls.each do |item| date = item['pathSuffix'].gsub('cass_snap_', '') = (node, date) snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, ) result.push(snapshot) end else = (node, date) snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, ) result.push(snapshot) end rescue Exception => e @logger.warn("Could not get snapshots for node #{node} : #{e.message}") end result end |
#list_snapshots(node: @cassandra.node_name) ⇒ Object
77 78 79 80 81 |
# File 'lib/backuptool.rb', line 77 def list_snapshots(node: @cassandra.node_name) @logger.info('Listing available snapshots') snapshots = search_snapshots(node: node) tp(snapshots, 'cluster', 'node', 'date') end |
#new_snapshot ⇒ Object
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/backuptool.rb', line 83 def new_snapshot @logger.info('Starting a new snapshot') snapshot = @cassandra.new_snapshot existing = search_snapshots(node: snapshot.node) last = if existing.empty? CassandraSnapshot.new(snapshot.cluster, snapshot.node, 'never') else existing[-1] end @logger.info('Uploading tables to Hadoop') files = snapshot. - last. @logger.info("#{files.length} files to upload") files.each do |file| @logger.info("Sending file #{file} to Hadoop") local = @cassandra.data_path + '/' + file remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file @logger.debug("#{local} => #{remote}") f = File.open(local, 'r') @hadoop.create(remote, f, overwrite: true) f.close end @logger.info('Sending metadata to Hadoop') remote = @hadoop.base_dir + '/' + + '/' + snapshot.cluster + '/' + snapshot.node + '/cass_snap_' + snapshot.date @logger.debug("metadata => #{remote}") @hadoop.create(remote, snapshot..to_a * "\n", overwrite: true) @cassandra.delete_snapshot(snapshot) @logger.info('Success !') end |
#restore_snapshot(node, date, destination) ⇒ Object
Restore a snapshot from HDFS
-
Args :
-
node-> node where the snapshot comes from -
date-> snapshot date -
destination-> local directory where to restore
-
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
# File 'lib/backuptool.rb', line 177 def restore_snapshot(node, date, destination) # Search the snapshot matching node and date snapshots = search_snapshots(node: node, date: date) if snapshots.empty? raise('No snapshot found for restore') elsif snapshots.length > 1 raise('More than one candidate snapshot to restore') else snapshot = snapshots[0] @logger.info("Restoring snapshot #{snapshot}") @logger.info("#{snapshot.metadata.length} files to restore") # For each file in metadata snapshot..each do |file| @logger.info("Restoring file #{file}") local = destination + '/' + file remote = @hadoop.base_dir + '/' + snapshot.cluster + '/' + snapshot.node + '/' + file # Download the file from hdfs buffered_download(remote, local) end @logger.info('Success !') end end |
#search_snapshots(node: 'ALL', date: 'ALL') ⇒ Object
Look for snapshots
-
Args :
-
node-> Cassandra node name -
date-> HDFS instance
-
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/backuptool.rb', line 28 def search_snapshots(node: 'ALL', date: 'ALL') result = [] def (node, date) remote = @hadoop.base_dir + '/' + + '/' + @cassandra.cluster_name + '/' + node + '/cass_snap_' + date return @hadoop.read(remote).split("\n").to_set rescue Exception => e raise("Could not read metadata : #{e.message}") end def get_snapshots_node(node, date) result = [] begin if date == 'ALL' ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}/#{node}") ls.each do |item| date = item['pathSuffix'].gsub('cass_snap_', '') = (node, date) snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, ) result.push(snapshot) end else = (node, date) snapshot = CassandraSnapshot.new(@cassandra.cluster_name, node, date, ) result.push(snapshot) end rescue Exception => e @logger.warn("Could not get snapshots for node #{node} : #{e.message}") end result end if node == 'ALL' begin ls = @hadoop.list("#{@hadoop.base_dir}/#{@metadir}/#{@cassandra.cluster_name}") ls.each do |item| n = item['pathSuffix'] result += get_snapshots_node(n, date) end rescue Exception => e @logger.warn("Could not get snapshots for cluster #{@cassandra.cluster_name} : #{e.message}") end else result = get_snapshots_node(node, date) end result.sort end |