Class: BC3::Snapshot

Inherits:
Object
  • Object
show all
Includes:
Helper
Defined in:
lib/bc3/snapshot.rb

Overview

Container for a snapshot.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Helper

crc32, #fixnum2int16, #fixnum2int32, #fixnum2int64, #shortstring2bcss

Constructor Details

#initialize(path, timestamp = Time.now) ⇒ Snapshot



27
28
29
30
31
32
33
34
35
# File 'lib/bc3/snapshot.rb', line 27

def initialize( path, timestamp = Time.now )
  $log.debug("Create Snapshot #{path}")
  @path = path
  @timestamp = timestamp || Time.now
  
  $log.debug("Create base folder for snapshot #{path}")
  @basefolder = Folder.new('SnapshotRoot', @timestamp)
  @index = nil
end

Instance Attribute Details

#basefolderObject (readonly)

Content of the snapshot



175
176
177
# File 'lib/bc3/snapshot.rb', line 175

def basefolder
  @basefolder
end

#pathObject (readonly)

homepath of the snapshot



173
174
175
# File 'lib/bc3/snapshot.rb', line 173

def path
  @path
end

#timestampObject (readonly)

Time stamp from snapshot. Default ‘now’



177
178
179
# File 'lib/bc3/snapshot.rb', line 177

def timestamp
  @timestamp
end

#utfObject

UTF-Flag. Is set by SnapshotParser#read_bcss



179
180
181
# File 'lib/bc3/snapshot.rb', line 179

def utf
  @utf
end

Class Method Details

.new_filelist(startpath, filelist) ⇒ Object

Simple interface to create a snapshot.

Snapshot structure is a list with a filename (including path) on a line.

Empty folders end with a slash.

file
folder/file1
folder/subfolder/subfile1
folder/subfolder2/


99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/bc3/snapshot.rb', line 99

def self.new_filelist( startpath, filelist )
  snap = self.new( startpath )
  filelist.strip.each_line{|line|
    line.strip!
    basename = ::File.basename(line)
    #Decide, if we add a (empty) folder or a file.
    entry = ( line[-1,1] == '/' ? 
                      Folder.new(basename) 
                    : File.new(filename: basename, filesize: 0)
                )
    path = ::File.dirname(line)
    if path == '.'
      snap << entry
    else
      snap.basefolder.add_with_path(path, entry)
    end
  }
  snap
end

.new_filesystem(dirname) ⇒ Object Also known as: newd

Create a snapshot from a directory.



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/bc3/snapshot.rb', line 68

def self.new_filesystem( dirname )
  $log.info("Build Snapshot from directory #{dirname}")
  
  #~ raise ArgumentError, "No hash given" unless data.is_a?(Hash)
  snapshot = new( ::File.expand_path("./#{dirname}") )
  Dir.chdir(dirname){
    Dir['*'].each{|f|
      if ::File.directory?(f)
        snapshot << Folder.new_by_dirname(f)
      elsif ::File.exist?(f)
        snapshot << File.new_by_filename(f)
      else
        raise ArgumentError, "#{f} not found in #{dirname}"
      end
    }
  }
  snapshot
end

.new_hash(data) ⇒ Object Also known as: newh

Create a snapshot from a hash.

A snapsot-hash must contain:

  • snapshot - dirname of the snapshot

  • content - array of folders (see Folder.newh) and files (File.new)

  • timestamp (optional)

Raises:

  • (ArgumentError)


44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/bc3/snapshot.rb', line 44

def self.new_hash( data )
  $log.info("Build Snapshot from hash")
  raise ArgumentError, "No hash given" unless data.is_a?(Hash)
  raise ArgumentError, "snapshot name missing" unless data.has_key?(:snapshot)
  raise ArgumentError, "content missing" unless data.has_key?(:content)
  raise ArgumentError, "Content is no array" unless data[:content].is_a?(Array)
  
  snapshot = new( data[:snapshot], data[:timestamp] )
  snapshot.utf = data[:utf]
  
  data[:content].each{| element |
    if element.has_key?(:dirname)
      snapshot << Folder.newh(element)
    elsif element.has_key?(:filename)
      snapshot << File.new(element)
    else
      raise ArgumentError, "element without dir/filename"
    end
  }
  snapshot
end

.uncompress(filename, filename2 = nil) ⇒ Object

Uncompress a snapshot and save it again.



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/bc3/snapshot.rb', line 128

def self.uncompress(filename, filename2 = nil)
  rawdata = nil
  ::File.open( filename, 'rb' ){|f| rawdata = f.read() }
  
  compressed = rawdata[16].getbyte(0) & 1 != 0
  
  pathlength = rawdata.slice(18).bytes.first
  if rawdata.slice(19) != "\x0"
    @log.warn("Path > 255 not supported")
    raise "Path > 255 not supported"
  end
  path = rawdata[20,pathlength]

  header = rawdata[0,16]
  if compressed
    header << (rawdata.slice(16).bytes.first - 1)
  else
    header << rawdata.slice(16)
  end
  header << rawdata[17,3] #keep original length
  #~ header << path.bytesize #byte 18
  #~ header << 0 #byte 19 - for long pathes
  header << path

  #will change again for compressed data
  body = rawdata[20 + pathlength ..-1]
  if compressed
    begin
      body= Zlib::Inflate.inflate(body);  #Unclear problem
    rescue Zlib::DataError
      $log.debug("Zlib::DataError occured - try with raw  deflate")
      #no luck with Zlib decompression. Let's try with raw  deflate,
      #like some broken browsers do.
      body= Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body)
    end
  end
  
  ::File.open( filename2, 'wb' ){|f|
    f << header
    f << body
  } if filename2
  header+body
end

Instance Method Details

#<<(content) ⇒ Object

Add content (folders/files) to snapshot.



184
185
186
187
# File 'lib/bc3/snapshot.rb', line 184

def << (content)
    reset_index
    @basefolder << content
end

#[](path) ⇒ Object

Get content of the folder.

If you request a folder, the path must end with a ‘/’



193
194
195
196
# File 'lib/bc3/snapshot.rb', line 193

def [](path)
  build_index() unless @index
  @index[path]
end

#bcss(compressed = false) ⇒ Object

Prepare a snapshot (bcss-file).

Only uncompressed structure.

Beyond Compare Snapshot Format Version 1.1

Beyond Compare snapshots (.bcss) are binary files containing the file metadata (names, sizes, last modified times) of a directory structure without storing any of the file content. They are designed to be read sequentially. File record sizes are variable, so there’s no way to seek to arbitrary records without reading all of the records before it.



341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
# File 'lib/bc3/snapshot.rb', line 341

def bcss( compressed = false ) 
  bcss = "".force_encoding('BINARY')
  bcss << bcss_header( compressed )
  if compressed
    $log.debug("Compress bcss-data")
    $log.fatal("Compress bcss-data not supported - only for test purposes")
=begin
Flags:
Compressed: If set everything following the header is compressed as a raw
deflate stream, as defined by RFC 1951.  It is the same compression used by
.zip and .gz archives.
=end
    #see for truncations http://www.ruby-forum.com/topic/101078
    # http://ilovett.com/blog/programming/ruby-zlib-deflate
    #~ puts "%-2i %s" % [ 99, bcss_data.inspect ]
    -1.upto(9){|i|
      puts "%-2i %s" % [ i, Zlib::Deflate.deflate( bcss_data, i )[2..-5].inspect ]
    }
    bcss << Zlib::Deflate.deflate( bcss_data)[2..-5]
      #~ bcss << Zlib::Deflate.new(nil, -Zlib::MAX_WBITS).deflate(bcss_data, Zlib::FINISH)
  else  #uncompressed
    bcss << bcss_data
  end
  bcss << 255

  bcss
end

#bcss_dataObject

Return the data part of the snapshot. This part may be packed.



463
464
465
466
467
468
469
# File 'lib/bc3/snapshot.rb', line 463

def bcss_data()
  data = "".force_encoding('BINARY')
  @basefolder.each{|key, folder|
    data << folder.bcss
  }
  data
end

#bcss_header(compressed) ⇒ Object

Create the header data for bcss-file

Snapshots start with a fixed size header that contains an ID value, version information, a creation date, and various flags, optionally followed by the source folder’s path:

- HEADER STRUCTURE -
   [0..3]   = 'BCSS'
   [4]      = Major version (UByte) 
   [5]      = Minor version (UByte)
   [6]	     = Minimum Supported Major Version (UByte)
   [7]	     = Minimum Supported Minor Version (UByte)
   [8..F]   = Creation Time (FileTime)
   [10..11] = Flags         (UWord)

           Bit : Meaning
             0 : Compressed
             1 : Source Path included
             2 : Reserved
             3 : UTF-8
          4-15 : Reserved

   [12..13] = Path Length (UWord)   | Optional
   [14..N]  = Path        (char[])  |

Version Information:

The first two version bytes represent the actual major and minor versions

of the file, and reference a specific version of this specification. The second pair of version bytes represent the minimum snapshot version which must be supported in order to read the snapshot file. Version 1.1 can be read by Version 1.0 applications, so currently Major/Minor should be set to 1.1 and Minimum should be 1.0.

Flags:

Compressed: If set everything following the header is compressed as a raw

deflate stream, as defined by RFC 1951. It is the same compression used by .zip and .gz archives.

Source Path included: If set the original folder's path is included

immediately after the header. This is only on part of the file besides the fixed header that is not compressed.

UTF-8: If set the snapshot was compressed on a system where the default

character encoding is UTF-8 (Linux, OS X). Filenames, paths, and link targets will all be stored as UTF-8. If this isn’t set the paths are stored using the original OS’s ANSI codepage (Windows). In that case any paths may be stored a second time as UTF-8 in extended headers.



419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
# File 'lib/bc3/snapshot.rb', line 419

def bcss_header( compressed )
  header = "".force_encoding('BINARY')
  header << 'BCSS'
  header << 1 #Major version (UByte) 
  header << 1 #Minor version (UByte) 
  header << 1 #Minimum Supported Major Version (UByte)
  header << 0 #Minimum Supported Minor Version (UByte)

  #[8..F]   = Creation Time (FileTime)
  #Windows FILETIME structure.  64-bit value representing the number of
  #100-nanosecond intervals since January 1, 1601 UTC.  Stored in local time.
  #8 Byte long
  #~ header << "%x" % Time.now.time2ad  #-> bignum too big to convert into `unsigned long' (RangeError)
  header << fixnum2int64(@timestamp.time2ad)
  #~ header << "\x70\x57\x5C\x25\x69\xB2\xCB\x01" #Data from example
  
  # [10..11] = Flags         (UWord)

        #~ Bit/Value : Meaning
          #~ 0/1 : Compressed
          #~ 1/2 : Source Path included
          #~ 2/4 : Reserved
          #~ 3/8 : UTF-8
       #~ 4-15 : Reserved
  flag = 0  #no flag set
  flag += 2 #Source Path included
  flag += 1 if compressed
  flag += 8 if @utf
  header << flag
  header << 0

  # [12..13] = Path Length (UWord)   | Optional
  header << @path.bytesize
  header << 0 #fixme if path > 255
  raise "too long path" if @path.size > 155 #fixme
  # [14..N]  = Path        (char[])  |
  header << @path.dup.force_encoding('binary')
  
  header
end

#build_indexObject

Build an index.

The index allows a faster process on each.

When the content changes (or content of one of the folders in the snapshot) then the index is deleted. See BC3::Snapshot#reset_index



256
257
258
259
260
261
262
263
264
# File 'lib/bc3/snapshot.rb', line 256

def build_index()
  $log.debug("Build index")
  @index = {}
  @basefolder.each(:recursive,:folders, :files){|key, content| 
    @index[key] = content
    content.snapshotpath[self] = key
  }
  @index
end

#each(*options) ⇒ Object

Loop on content of the folder.

Options:

  • :files (default)

  • :folders

  • :rootonly - only root, no subdirectories

See also BC3::Folder#each (in combination with BC3::Snapshot#basefolder).



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/bc3/snapshot.rb', line 206

def each(*options)
  build_index() unless @index
  options = [:files] if options.empty?
  if options.include?(:flat)
    $log.warn("each-option :flat should be rootonly")
    options << :rootonly
  end

  result = {}
  @index.each{|key,content|
    next if options.include?(:rootonly) and key =~ %r{/.} #no subdirectory
    case content
      when File
        result[key] = content if options.include?(:files)
      when Folder
        result[key] = content if options.include?(:folders)
      else
        raise "Internal error"
      end
  }
  
  if block_given?
    result.each{|key, content| yield key, content }
  else
    result
  end
end

#reset_index(rebuild = false) ⇒ Object

Reset the index. The index will be regenerated when needed (See BC3::Snapshot#build_index)

Called from folders of the snapshot, when they get new content.



240
241
242
243
244
# File 'lib/bc3/snapshot.rb', line 240

def reset_index(rebuild = false)
  $log.debug("Delete index")
  @index = nil
  build_index() if rebuild
end

#save(filename, compressed = nil) ⇒ Object

Save the snapshot.

Depending on extension you get different results:

  • *.bcss Beyond compare snapshot format

  • *.yaml Yaml file.

You may save the snapshot as a yaml file, modify the data and create a new bcss-file.

bc3 = BC3::Snapshot.new()
...
bc3.save('my_snapshot.yaml')

... modify the yaml file

bc3 = BC3::Snapshot.new_hash(YAML.load(File.read('my_snapshot.yaml'))
bc3.save('my_snapshot.bcss')


286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# File 'lib/bc3/snapshot.rb', line 286

def save( filename, compressed = nil ) 
  $log.debug("Prepare snapshot for #{filename}")
  
  case filename
    when /\.ya?ml/
      ::File.open(filename,'w'){|f|
        f << to_hash.to_yaml
      }
      $log.info("Saved snapshot as #{filename}")
    when /\.bcss/
      #Check if compressed or uncompressed output wanted
      compressed = ( filename =~ /\.bcssx/ ) if compressed.nil?
      #Must be binary, else a \n get's \r\n under windows.
      ::File.open(filename,'wb'){|f|
        f << bcss( compressed )
      }
      $log.info("Saved snapshot as #{filename}")
    else
      raise ArgumentError, "Undefined filetype #{::File.extname(filename)}"
    end
end

#statisticObject

Create a little statistic.

  • Count of files and folders

  • Collection of duplicates, grouped by crc



476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
# File 'lib/bc3/snapshot.rb', line 476

def statistic()
  count = Hash.new(0)
  crc   = Hash.new()
  #feature idea: duplicates_by_name . Better: duplicates_by_filename , duplicates_by_dirname ?
  
        
  @basefolder.each(:recursive,:folders, :files){|key, element|
    case element
      when Folder
        count['folders'] += 1
      when File
        count['files'] += 1
        (crc[element.crc] ||= [] ) << key unless element.crc.nil? 
      else
        raise "Internal error #{element}"
      end
  }
  
  count['duplicates_by_crc'] = {} 
  crc.each{|key, values|
    count['duplicates_by_crc'][key] = values if values.size > 1
  }
  count.delete('duplicates_by_crc') if count['duplicates_by_crc'].empty?
  count
end

#to_hashObject

Collect the data in a hash.

Usefull in combination with yaml:

require 'bc3'
require 'yaml'
#...
snapshot = snapshot.new(...)
#...
puts snapshot.to_hash.to_yaml


318
319
320
321
322
323
324
325
326
327
# File 'lib/bc3/snapshot.rb', line 318

def to_hash()
  result =   {
                  snapshot: @path,
                  timestamp: @timestamp,
                  content: @basefolder.each.values.map{| x | x.to_hash }
                }
  result[:utf] = @utf unless @utf.nil?

  result
end