Class: BC3::SnapshotParser

Inherits:
Object
  • Object
show all
Defined in:
lib/bc3/parse.rb

Overview

Parser for a given bcss-file.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filename) ⇒ SnapshotParser

Returns a new instance of SnapshotParser.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/bc3/parse.rb', line 20

def initialize( filename )
  @log = $log #fixme replace with sublogger

  @log.info("Read and parse #{filename}")
  @timestamp = Time.now
  case filename
    when /\.ya?ml/
      ::File.open(filename ){|f|
        @snapshot = Snapshot.new_hash(YAML.read(f))
      }
    when /\.bcssx?/
      read_bcss(filename)
    else
      raise ArgumentError, "Undefined filetype #{::File.extname(filename)}"
    end
end

Instance Attribute Details

#snapshotObject (readonly)

Snapshot-object, result of the parsing.



109
110
111
# File 'lib/bc3/parse.rb', line 109

def snapshot
  @snapshot
end

#timestampObject (readonly)

Returns the value of attribute timestamp.



110
111
112
# File 'lib/bc3/parse.rb', line 110

def timestamp
  @timestamp
end

Instance Method Details

#parse_body(body) ⇒ Object

Parse the body data.

This method will change the given parameter.



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/bc3/parse.rb', line 117

def parse_body(body)
  folderstack = [ @snapshot ]
  while ! body.empty?
=begin
Each record starts with a single UByte ID value and then the data defined below.
=end

    case last_flag = body.slice!(0)
=begin
ID_DIRECTORY (0x01)
Represents a directory on the system, or an expanded archive file.

Name           : ShortString
Last Modified  : FileTime
DOS Attributes : UInt32
=end

      when "\x01" #folder

        dirname, tail = parse_shortstring(body)
        filetime, tail = parse_filetime(tail)
        attributes, tail = parse_dosattributes(tail)
        folder = Folder.newh(
          dirname: dirname,
          timestamp: filetime,
          attributes: attributes
        )
        folderstack.last << folder
        folderstack << folder
=begin
ID_FILE (0x02)
Represents a file on the system.  

Name           : ShortString
Last Modified  : FileTime
DOS Attributes : UInt32
Size           : Int32[+Int64]
   If Size > 2GB, store as Int32(-1) followed by Int64
CRC32          : UInt32
=end

      when "\x02" #file

        filename, tail = parse_shortstring(body)
        filetime, tail = parse_filetime(tail)
        attributes, tail = parse_dosattributes(tail)
        filesize, tail = parse_uint32(tail)
        crc32, tail = parse_uint32(tail)
        folderstack.last << File.new(
          filename: filename,
          timestamp: filetime,
          attributes: attributes,
          filesize: filesize,
          crc: crc32
        )
=begin
ID_FILE_EX (0x03)
Represents a file on the system, with extended headers.

Name..CRC32 is the same as ID_FILE
ExtraLen       : UInt16
ExtraData      : Byte[ExtraLen]

=end

      when "\x03" #file

        filename, tail = parse_shortstring(body)
        filetime, tail = parse_filetime(tail)
        attributes, tail = parse_dosattributes(tail)
        filesize, tail = parse_uint32(tail)
        crc32, tail = parse_uint32(tail)
        extradata, tail = parse_longstring(tail)
        extradata = parse_file_extended_headers(extradata)
        unless extradata #Skip at prob

          @log.warn("Skip #{filename} because of unsupported extended header")
          next  
        end
        folderstack.last << File.new({
          filename: filename,
          timestamp: filetime,
          attributes: attributes,
          filesize: filesize,
          crc: crc32,
          }.merge(extradata)
        )
=begin
ID_EXTENDED (0x04)
Extended headers

SubType        : UByte
Length         : UWord
Data           : Byte[Length]
=end

      when "\x04" #file

        parse_extended_header_subtypes(body, folderstack.last)
=begin
ID_DIRECTORY_END (0xFF)
Represents the end of a directory listing.  No data.
=end            

      when "\xff" #end of folder

        folderstack.pop
      else
        @log.fatal("Undefined body-parse element #{last_flag.inspect}")
  p body
        body.slice!(0..-1)  #close further pasring

      end
    end
    if folderstack.size > 1
      @log.error("Folders in Folderstack not closed correct - #{folderstack.size} levels open")
    end
end

#parse_dosattributes(string) ⇒ Object

Get DOS-attributes.



287
288
289
290
291
292
# File 'lib/bc3/parse.rb', line 287

def parse_dosattributes( string )
  #Get length of path

  attributes = string.slice!(0).bytes.first
  string.slice!(0,3)  #skip next 3 bytes

  return [attributes, string]
end

#parse_extended_header_subtypes(data, last_folder) ⇒ Object

Extended Header Subtypes
========================

Extended headers should be written in ascending numeric order.  Once BC sees
an extended subtype that it doesn't undertand it stops processing ID_EXTENDED
headers until it finds one of ID_DIRECTORY/ID_DIRECTORY_END/ID_FILE/ID_FILE_EX.

Side effect: the parameter will be shortened.



353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
# File 'lib/bc3/parse.rb', line 353

def parse_extended_header_subtypes(data, last_folder)
    continue = true
    while continue
      case flag = data.slice!(0)
=begin
EX_UTF8 (0x01)
UTF-8 encoded filename for the ID_DIRECTORY that immediately preceeded
this header.  The length is given in the ID_EXTENDED header and the data is a
char[].
If the .bcss header flags indicate that the data is not UTF-8 and the
source path is included this can be included as the first record in the file
in order to give a UTF-8 version of the source path.
=end

        when "\x01"
          ex_utf, data = parse_shortstring( data )
          last_folder.ex_utf = ex_utf.force_encoding('utf-8')
=begin
EX_DIRECTORY_EX (0x02)
Extended directory header for the ID_DIRECTORY that immediately preceeded
this header.  Data is the record below, but Length may be larger to support
future expansion.

Flags         : UByte
  Bit : Meaning
    0 : Error - Contents not available.  Flag as a load error in BC.
=end

      when "\x02" #UTF-8 encoded filename

          @log.fatal("Undefined extended_header_subtypes 2")
          ex_directory_ex = data.slice!(0)
=begin
EX_RESYNC (0x03)
Works around a bug in Beyond Compare's parser in versions prior to 3.2.2.
If an ID_DIRECTORY is followed by any ID_EXTENDED headers besides EX_UTF8 or
EX_DIRECTORY_EX include one copy of this header before them.

Length : UWord   = 0x0001
Data   : Byte[1] = 0
=end

      when "\x03" #UTF-8 encoded symbolic link path

          @log.fatal("Undefined extended_header_subtypes 3")
          size = data.slice!(0).bytes.first
          ex_resync = data.slice!(size)
=begin
EX_LINK_PATH (0x04)
UTF-8 encoded symbolic link path for the ID_DIRECTORY that immediately
preceeded this header.  The length is given in the ID_EXTENDED header and the
data is a char[].
=end

      when "\x04" #UTF-8 encoded symbolic link path

          @log.fatal("Undefined extended_header_subtypes 4")
          ex_link_path, data = parse_shortstring( data )
          last_folder.ex_link_path = ex_link_path.force_encoding('utf-8')
      else
          @log.debug("Stop extended header subtype handling #{flag.inspect} <#{data.inspect}>")
          continue = false #stop evaluation

        end
      end #while continue

end

#parse_file_extended_headers(extradata_string) ⇒ Object

File Extended Headers

Like extended headers, file extended headers should be written in ascending numeric order.

FILE_EX_VERSION (0x01)

String representation of an executable file's Major/Minor/Maint/Build

version (e.g., “2.11.28.3542”).

Length : UByte
Data   : char[Length]

FILE_EX_UTF8 (0x02)

UTF-8 encoded filename.  Stored as a FileExString.  Only used if the UTF-8

name doesn’t match the ANSI encoded one or if the filename is longer than 255 characters.

FILE_EX_LINK_PATH (0x03)

UTF-8 encoded symbolic link path.  Stored as a FileExString.


318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/bc3/parse.rb', line 318

def parse_file_extended_headers(extradata_string)
  extradata = {}
  #loop on extradata...

  while !extradata_string.empty?
    case flag = extradata_string.slice!(0)
      when "\x01" #version

        extradata[:version], extradata_string = parse_shortstring( extradata_string )
      when "\x02" #UTF-8 encoded filename

          extradata[:utfpath], extradata_string = parse_shortstring( extradata_string )
          extradata[:utfpath].force_encoding('utf-8')
      when "\x03" #UTF-8 encoded symbolic link path

          extradata[:utfsymlink], extradata_string = parse_shortstring( extradata_string )
          extradata[:utfsymlink].force_encoding('utf-8')
      else
          @log.error("Undefined extra data handling #{flag.inspect} <#{extradata_string.inspect}>")
          extradata_string = '' #stop evaluation

      end
  end
    #~ unless extradata_string.empty?

      #~ @log.warn("Undefined extra data handling <#{extradata_string.inspect}>")

      #~ p extradata_string

    #~ end

  extradata
end

#parse_filetime(string) ⇒ Object

Get a “filetime”.

FileTime:

Windows FILETIME structure.  64-bit value representing the number of

100-nanosecond intervals since January 1, 1601 UTC. Stored in local time.

Return time and rest of string.



278
279
280
281
282
# File 'lib/bc3/parse.rb', line 278

def parse_filetime( string )
  ad_time = string.slice!(0,8)  #Integer with filetime

  time = Time.ad2time(ad_time.reverse.each_byte.map{|x| '%08b' % x}.join.to_i(2))
  return [time, string]
end

#parse_longstring(string) ⇒ Object

Get a “longstring”.

2 Bytes with length, then the string. The length is including the 2 bytes for the length

Return longstring and rest of string.



248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/bc3/parse.rb', line 248

def parse_longstring( string )
  stringsize = string.slice!(0).bytes.first - 2
  if string.slice!(0) != "\x0"
    @log.warn("longstring > 255 not supported")
    raise "longstring > 255 not supported"
  end
  if @utf
    return [string.slice!(0,stringsize).force_encoding('utf-8'), string]
  else
    return [string.slice!(0,stringsize), string]
  end
  return [string.slice!(0,stringsize), string]
end

#parse_shortstring(string) ⇒ Object

Get a “shortstring”.

1 Byte with length, then the string.

Return shortstring and rest of string.



229
230
231
232
233
234
235
236
237
238
# File 'lib/bc3/parse.rb', line 229

def parse_shortstring( string )    
  #Get length of path

  pathsize = string.slice!(0).bytes.first
                  # + rawdata[19].bytes.first * 255  #--test it 

  if @utf
    return [string.slice!(0,pathsize).force_encoding('utf-8'), string]
  else
    return [string.slice!(0,pathsize), string]
  end
end

#parse_uint32(string) ⇒ Object

Get Unsigned 32-bit number



264
265
266
267
# File 'lib/bc3/parse.rb', line 264

def parse_uint32( string )
  num = string.slice!(0,4).reverse.each_byte.map{|x| '%08b' % x}.join.to_i(2)
  return [num, string]
end

#read_bcss(filename) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/bc3/parse.rb', line 37

def read_bcss(filename)
  rawdata = nil
  ::File.open( filename, 'rb' ){|f|
    rawdata = f.read()
  }
  
=begin
 - HEADER STRUCTURE -
[0..3]   = 'BCSS'
[4]      = Major version (UByte) 
[5]      = Minor version (UByte)
[6]      = Minimum Supported Major Version (UByte)
[7]      = Minimum Supported Minor Version (UByte)
[8..F]   = Creation Time (FileTime)
[10..11] = Flags         (UWord)

        Bit : Meaning
          0 : Compressed
          1 : Source Path included
          2 : Reserved
          3 : UTF-8
       4-15 : Reserved

[12..13] = Path Length (UWord)   | Optional
[14..N]  = Path        (char[])  |
=end

  #~ header =  rawdata[0..17]

  @timestamp, tail = parse_filetime(rawdata[8,8])
  #Analyse flags - byte position 16/hex10

  @compressed = rawdata[16].getbyte(0) & 1 != 0
  @sourcepath = rawdata[16].getbyte(0) & 2 != 0
  @reserved    = rawdata[16].getbyte(0) & 4 != 0
  @utf           = rawdata[16].getbyte(0) & 8 != 0      
  @log.warn("UTF-data") if @utf
  if rawdata[17] != "\x0"
    @log.warn("2nd flag byte is filled")
  end
  
  #Analyse Source path

  #Delete second length parameter for source path

  if rawdata.slice!(19) != "\x0"
    @log.warn("Path > 255 not supported")
    raise "Path > 255 not supported"
  end
  path, body = parse_shortstring(rawdata[18..-1])
  path.force_encoding('utf-8') if @utf
  if @compressed
=begin
Flags:
Compressed: If set everything following the header is compressed as a raw
deflate stream, as defined by RFC 1951.  It is the same compression used by
.zip and .gz archives.

Code from http://www.ruby-forum.com/topic/136825
=end        

    @log.debug("uncompress body data")
    begin
      body= Zlib::Inflate.inflate(body);  #Unclear problem

    rescue Zlib::DataError
      @log.debug("Zlib::DataError occured - try with raw  deflate")
      #no luck with Zlib decompression. Let's try with raw  deflate,

      #like some broken browsers do.

      body= Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body)
    end
  end
  
  @snapshot = Snapshot.new(path, @timestamp)
  @snapshot.utf = @utf

  parse_body(body)
end