Class: BC3::SnapshotParser

Inherits:

Object

Object
BC3::SnapshotParser

show all

Defined in:: lib/bc3/parse.rb

Overview

Parser for a given bcss-file.

Instance Attribute Summary collapse

#snapshot ⇒ Object readonly

Snapshot-object, result of the parsing.
#timestamp ⇒ Object readonly

Returns the value of attribute timestamp.

Instance Method Summary collapse

#initialize(filename) ⇒ SnapshotParser constructor

A new instance of SnapshotParser.
#parse_body(body) ⇒ Object

Parse the body data.
#parse_dosattributes(string) ⇒ Object

Get DOS-attributes.
#parse_extended_header_subtypes(data, last_folder) ⇒ Object

Extended Header Subtypes ========================.
#parse_file_extended_headers(extradata_string) ⇒ Object

File Extended Headers =====================.
#parse_filetime(string) ⇒ Object

Get a “filetime”.
#parse_longstring(string) ⇒ Object

Get a “longstring”.
#parse_shortstring(string) ⇒ Object

Get a “shortstring”.
#parse_uint32(string) ⇒ Object

Get Unsigned 32-bit number.
#read_bcss(filename) ⇒ Object

Constructor Details

#initialize(filename) ⇒ `SnapshotParser`

Returns a new instance of SnapshotParser.

# File 'lib/bc3/parse.rb', line 20

def initialize( filename )
  @log = $log #fixme replace with sublogger

  @log.info("Read and parse #{filename}")
  @timestamp = Time.now
  case filename
    when /\.ya?ml/
      ::File.open(filename ){|f|
        @snapshot = Snapshot.new_hash(YAML.read(f))
      }
    when /\.bcssx?/
      read_bcss(filename)
    else
      raise ArgumentError, "Undefined filetype #{::File.extname(filename)}"
    end
end

Instance Attribute Details

#snapshot ⇒ `Object` (readonly)

Snapshot-object, result of the parsing.



109
110
111

# File 'lib/bc3/parse.rb', line 109

def snapshot
  @snapshot
end

#timestamp ⇒ `Object` (readonly)

Returns the value of attribute timestamp.



110
111
112

# File 'lib/bc3/parse.rb', line 110

def timestamp
  @timestamp
end

Instance Method Details

#parse_body(body) ⇒ `Object`

Parse the body data.

This method will change the given parameter.

# File 'lib/bc3/parse.rb', line 117

def parse_body(body)
  folderstack = [ @snapshot ]
  while ! body.empty?
=begin
Each record starts with a single UByte ID value and then the data defined below.
=end

    case last_flag = body.slice!(0)
=begin
ID_DIRECTORY (0x01)
Represents a directory on the system, or an expanded archive file.

Name           : ShortString
Last Modified  : FileTime
DOS Attributes : UInt32
=end

      when "\x01" #folder

        dirname, tail = parse_shortstring(body)
        filetime, tail = parse_filetime(tail)
        attributes, tail = parse_dosattributes(tail)
        folder = Folder.newh(
          dirname: dirname,
          timestamp: filetime,
          attributes: attributes
        )
        folderstack.last << folder
        folderstack << folder
=begin
ID_FILE (0x02)
Represents a file on the system.  

Name           : ShortString
Last Modified  : FileTime
DOS Attributes : UInt32
Size           : Int32[+Int64]
   If Size > 2GB, store as Int32(-1) followed by Int64
CRC32          : UInt32
=end

      when "\x02" #file

        filename, tail = parse_shortstring(body)
        filetime, tail = parse_filetime(tail)
        attributes, tail = parse_dosattributes(tail)
        filesize, tail = parse_uint32(tail)
        crc32, tail = parse_uint32(tail)
        folderstack.last << File.new(
          filename: filename,
          timestamp: filetime,
          attributes: attributes,
          filesize: filesize,
          crc: crc32
        )
=begin
ID_FILE_EX (0x03)
Represents a file on the system, with extended headers.

Name..CRC32 is the same as ID_FILE
ExtraLen       : UInt16
ExtraData      : Byte[ExtraLen]

=end

      when "\x03" #file

        filename, tail = parse_shortstring(body)
        filetime, tail = parse_filetime(tail)
        attributes, tail = parse_dosattributes(tail)
        filesize, tail = parse_uint32(tail)
        crc32, tail = parse_uint32(tail)
        extradata, tail = parse_longstring(tail)
        extradata = parse_file_extended_headers(extradata)
        unless extradata #Skip at prob

          @log.warn("Skip #{filename} because of unsupported extended header")
          next  
        end
        folderstack.last << File.new({
          filename: filename,
          timestamp: filetime,
          attributes: attributes,
          filesize: filesize,
          crc: crc32,
          }.merge(extradata)
        )
=begin
ID_EXTENDED (0x04)
Extended headers

SubType        : UByte
Length         : UWord
Data           : Byte[Length]
=end

      when "\x04" #file

        parse_extended_header_subtypes(body, folderstack.last)
=begin
ID_DIRECTORY_END (0xFF)
Represents the end of a directory listing.  No data.
=end            

      when "\xff" #end of folder

        folderstack.pop
      else
        @log.fatal("Undefined body-parse element #{last_flag.inspect}")
  p body
        body.slice!(0..-1)  #close further pasring

      end
    end
    if folderstack.size > 1
      @log.error("Folders in Folderstack not closed correct - #{folderstack.size} levels open")
    end
end

#parse_dosattributes(string) ⇒ `Object`

Get DOS-attributes.

# File 'lib/bc3/parse.rb', line 287

def parse_dosattributes( string )
  #Get length of path

  attributes = string.slice!(0).bytes.first
  string.slice!(0,3)  #skip next 3 bytes

  return [attributes, string]
end

#parse_extended_header_subtypes(data, last_folder) ⇒ `Object`

Extended Header Subtypes
========================

Extended headers should be written in ascending numeric order.  Once BC sees
an extended subtype that it doesn't undertand it stops processing ID_EXTENDED
headers until it finds one of ID_DIRECTORY/ID_DIRECTORY_END/ID_FILE/ID_FILE_EX.

Side effect: the parameter will be shortened.

# File 'lib/bc3/parse.rb', line 353

def parse_extended_header_subtypes(data, last_folder)
    continue = true
    while continue
      case flag = data.slice!(0)
=begin
EX_UTF8 (0x01)
UTF-8 encoded filename for the ID_DIRECTORY that immediately preceeded
this header.  The length is given in the ID_EXTENDED header and the data is a
char[].
If the .bcss header flags indicate that the data is not UTF-8 and the
source path is included this can be included as the first record in the file
in order to give a UTF-8 version of the source path.
=end

        when "\x01"
          ex_utf, data = parse_shortstring( data )
          last_folder.ex_utf = ex_utf.force_encoding('utf-8')
=begin
EX_DIRECTORY_EX (0x02)
Extended directory header for the ID_DIRECTORY that immediately preceeded
this header.  Data is the record below, but Length may be larger to support
future expansion.

Flags         : UByte
  Bit : Meaning
    0 : Error - Contents not available.  Flag as a load error in BC.
=end

      when "\x02" #UTF-8 encoded filename

          @log.fatal("Undefined extended_header_subtypes 2")
          ex_directory_ex = data.slice!(0)
=begin
EX_RESYNC (0x03)
Works around a bug in Beyond Compare's parser in versions prior to 3.2.2.
If an ID_DIRECTORY is followed by any ID_EXTENDED headers besides EX_UTF8 or
EX_DIRECTORY_EX include one copy of this header before them.

Length : UWord   = 0x0001
Data   : Byte[1] = 0
=end

      when "\x03" #UTF-8 encoded symbolic link path

          @log.fatal("Undefined extended_header_subtypes 3")
          size = data.slice!(0).bytes.first
          ex_resync = data.slice!(size)
=begin
EX_LINK_PATH (0x04)
UTF-8 encoded symbolic link path for the ID_DIRECTORY that immediately
preceeded this header.  The length is given in the ID_EXTENDED header and the
data is a char[].
=end

      when "\x04" #UTF-8 encoded symbolic link path

          @log.fatal("Undefined extended_header_subtypes 4")
          ex_link_path, data = parse_shortstring( data )
          last_folder.ex_link_path = ex_link_path.force_encoding('utf-8')
      else
          @log.debug("Stop extended header subtype handling #{flag.inspect} <#{data.inspect}>")
          continue = false #stop evaluation

        end
      end #while continue

end

#parse_file_extended_headers(extradata_string) ⇒ `Object`

File Extended Headers

Like extended headers, file extended headers should be written in ascending numeric order.

FILE_EX_VERSION (0x01)

String representation of an executable file's Major/Minor/Maint/Build

version (e.g., “2.11.28.3542”).

Length : UByte
Data   : char[Length]

FILE_EX_UTF8 (0x02)

UTF-8 encoded filename.  Stored as a FileExString.  Only used if the UTF-8

name doesn’t match the ANSI encoded one or if the filename is longer than 255 characters.

FILE_EX_LINK_PATH (0x03)

UTF-8 encoded symbolic link path.  Stored as a FileExString.

# File 'lib/bc3/parse.rb', line 318

def parse_file_extended_headers(extradata_string)
  extradata = {}
  #loop on extradata...

  while !extradata_string.empty?
    case flag = extradata_string.slice!(0)
      when "\x01" #version

        extradata[:version], extradata_string = parse_shortstring( extradata_string )
      when "\x02" #UTF-8 encoded filename

          extradata[:utfpath], extradata_string = parse_shortstring( extradata_string )
          extradata[:utfpath].force_encoding('utf-8')
      when "\x03" #UTF-8 encoded symbolic link path

          extradata[:utfsymlink], extradata_string = parse_shortstring( extradata_string )
          extradata[:utfsymlink].force_encoding('utf-8')
      else
          @log.error("Undefined extra data handling #{flag.inspect} <#{extradata_string.inspect}>")
          extradata_string = '' #stop evaluation

      end
  end
    #~ unless extradata_string.empty?

      #~ @log.warn("Undefined extra data handling <#{extradata_string.inspect}>")

      #~ p extradata_string

    #~ end

  extradata
end

#parse_filetime(string) ⇒ `Object`

Get a “filetime”.

FileTime:

Windows FILETIME structure.  64-bit value representing the number of

100-nanosecond intervals since January 1, 1601 UTC. Stored in local time.

Return time and rest of string.

# File 'lib/bc3/parse.rb', line 278

def parse_filetime( string )
  ad_time = string.slice!(0,8)  #Integer with filetime

  time = Time.ad2time(ad_time.reverse.each_byte.map{|x| '%08b' % x}.join.to_i(2))
  return [time, string]
end

#parse_longstring(string) ⇒ `Object`

Get a “longstring”.

2 Bytes with length, then the string. The length is including the 2 bytes for the length

Return longstring and rest of string.

# File 'lib/bc3/parse.rb', line 248

def parse_longstring( string )
  stringsize = string.slice!(0).bytes.first - 2
  if string.slice!(0) != "\x0"
    @log.warn("longstring > 255 not supported")
    raise "longstring > 255 not supported"
  end
  if @utf
    return [string.slice!(0,stringsize).force_encoding('utf-8'), string]
  else
    return [string.slice!(0,stringsize), string]
  end
  return [string.slice!(0,stringsize), string]
end

#parse_shortstring(string) ⇒ `Object`

Get a “shortstring”.

1 Byte with length, then the string.

Return shortstring and rest of string.

# File 'lib/bc3/parse.rb', line 229

def parse_shortstring( string )    
  #Get length of path

  pathsize = string.slice!(0).bytes.first
                  # + rawdata[19].bytes.first * 255  #--test it 

  if @utf
    return [string.slice!(0,pathsize).force_encoding('utf-8'), string]
  else
    return [string.slice!(0,pathsize), string]
  end
end

#parse_uint32(string) ⇒ `Object`

Get Unsigned 32-bit number

# File 'lib/bc3/parse.rb', line 264

def parse_uint32( string )
  num = string.slice!(0,4).reverse.each_byte.map{|x| '%08b' % x}.join.to_i(2)
  return [num, string]
end

#read_bcss(filename) ⇒ `Object`

# File 'lib/bc3/parse.rb', line 37

def read_bcss(filename)
  rawdata = nil
  ::File.open( filename, 'rb' ){|f|
    rawdata = f.read()
  }
  
=begin
 - HEADER STRUCTURE -
[0..3]   = 'BCSS'
[4]      = Major version (UByte) 
[5]      = Minor version (UByte)
[6]      = Minimum Supported Major Version (UByte)
[7]      = Minimum Supported Minor Version (UByte)
[8..F]   = Creation Time (FileTime)
[10..11] = Flags         (UWord)

        Bit : Meaning
          0 : Compressed
          1 : Source Path included
          2 : Reserved
          3 : UTF-8
       4-15 : Reserved

[12..13] = Path Length (UWord)   | Optional
[14..N]  = Path        (char[])  |
=end

  #~ header =  rawdata[0..17]

  @timestamp, tail = parse_filetime(rawdata[8,8])
  #Analyse flags - byte position 16/hex10

  @compressed = rawdata[16].getbyte(0) & 1 != 0
  @sourcepath = rawdata[16].getbyte(0) & 2 != 0
  @reserved    = rawdata[16].getbyte(0) & 4 != 0
  @utf           = rawdata[16].getbyte(0) & 8 != 0      
  @log.warn("UTF-data") if @utf
  if rawdata[17] != "\x0"
    @log.warn("2nd flag byte is filled")
  end
  
  #Analyse Source path

  #Delete second length parameter for source path

  if rawdata.slice!(19) != "\x0"
    @log.warn("Path > 255 not supported")
    raise "Path > 255 not supported"
  end
  path, body = parse_shortstring(rawdata[18..-1])
  path.force_encoding('utf-8') if @utf
  if @compressed
=begin
Flags:
Compressed: If set everything following the header is compressed as a raw
deflate stream, as defined by RFC 1951.  It is the same compression used by
.zip and .gz archives.

Code from http://www.ruby-forum.com/topic/136825
=end        

    @log.debug("uncompress body data")
    begin
      body= Zlib::Inflate.inflate(body);  #Unclear problem

    rescue Zlib::DataError
      @log.debug("Zlib::DataError occured - try with raw  deflate")
      #no luck with Zlib decompression. Let's try with raw  deflate,

      #like some broken browsers do.

      body= Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body)
    end
  end
  
  @snapshot = Snapshot.new(path, @timestamp)
  @snapshot.utf = @utf

  parse_body(body)
end

Class: BC3::SnapshotParser

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Extended Header Subtypes ========================.

File Extended Headers =====================.

Constructor Details

#initialize(filename) ⇒ SnapshotParser

Instance Attribute Details

#snapshot ⇒ Object (readonly)

#timestamp ⇒ Object (readonly)

Instance Method Details

#parse_body(body) ⇒ Object

#parse_dosattributes(string) ⇒ Object

#parse_extended_header_subtypes(data, last_folder) ⇒ Object

#parse_file_extended_headers(extradata_string) ⇒ Object

#parse_filetime(string) ⇒ Object

#parse_longstring(string) ⇒ Object

#parse_shortstring(string) ⇒ Object

#parse_uint32(string) ⇒ Object

#read_bcss(filename) ⇒ Object