Class: MD5deep

Inherits:
Object
  • Object
show all
Defined in:
lib/metadata/util/md5deep.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(fs = nil, options = {}) ⇒ MD5deep

Returns a new instance of MD5deep.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/metadata/util/md5deep.rb', line 14

def initialize(fs = nil, options = {})
  @fullFileCount = 0
  @fullDirCount = 0
  @drive_letter = nil

  # Create XML document
  @xml = XmlHash.createDoc(:filesystem)
  @fs = fs.kind_of?(MiqFS) ? fs : nil

  # Read optional parameters if they exist in the options hash
  @opts = {'versioninfo' => true, 'imports' => true, 'contents' => false,
    'exclude' => ["pagefile.sys", "hiberfil.sys", ".", ".."],
    'digest' => ["md5"], "winVerList" => %w(.exe .dll .ocx .scr)
  }.merge(options)
  # Make sure md5 is part of our digest array
  @opts['digest'].push("md5") unless @opts['digest'].include?("md5")
  # Convert hash to an OpenStruct for cleaner referencing
  @opts = OpenStruct.new(@opts)

  # Conditionally load digest libraries as needed.
  @opts.digest.each do |h|
    begin
      require 'digest/' + h.downcase
    rescue LoadError
      # This load error is not a concern since the standard digests are already included
      # in ruby by default, and the non-standard ones will be loaded by their name above.
      # $log.debug "Unable to load module for [#{h}]"
    end
  end
end

Instance Attribute Details

#fullDirCountObject (readonly)

Returns the value of attribute fullDirCount.



11
12
13
# File 'lib/metadata/util/md5deep.rb', line 11

def fullDirCount
  @fullDirCount
end

#fullFileCountObject (readonly)

Returns the value of attribute fullFileCount.



11
12
13
# File 'lib/metadata/util/md5deep.rb', line 11

def fullFileCount
  @fullFileCount
end

#importsObject

Returns the value of attribute imports.



12
13
14
# File 'lib/metadata/util/md5deep.rb', line 12

def imports
  @imports
end

#versioninfoObject

Returns the value of attribute versioninfo.



12
13
14
# File 'lib/metadata/util/md5deep.rb', line 12

def versioninfo
  @versioninfo
end

Class Method Details

.scan_glob(fs, filename, options = {}) ⇒ Object



53
54
55
56
# File 'lib/metadata/util/md5deep.rb', line 53

def self.scan_glob(fs, filename, options = {})
  md5 = MD5deep.new(fs, options)
  md5.scan_glob(filename)
end

Instance Method Details

#calculate_digest(fileName) ⇒ Object



224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/metadata/util/md5deep.rb', line 224

def calculate_digest(fileName)
  unless @opts.digest.empty?
    #      if fileName.class.to_s != "MiqFile"
    #        raise "File [#{fileName}] is not in a readable state." unless File.readable?(fileName)
    #      end

    # Create hash for requested digests
    digest = create_digest_hash

    begin
      fileName.seek(0, IO::SEEK_SET)
      # Loop over each digest and add the file contents
      while (buf = fileName.read(10_240_000))
        digest.each_pair { |_k, v| v << buf }
      end
    rescue => err
      $log.error "Error #{err} reading file to calculate digest"
      $log.debug err.backtrace.join("\n")
    end
  end

  digest.each_pair { |k, v| digest[k] = v.to_s }
  digest
end

#calculate_sums(xmlNode) ⇒ Object



205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/metadata/util/md5deep.rb', line 205

def calculate_sums(xmlNode)
  rollup = create_digest_hash
  # Add size to the hash as a Fixnum
  rollup['size'] = 0

  xmlNode.each_element do |e|
    rollup.each_pair do |k, _v|
      if k == "size"
        rollup[k] += e.attributes[k].to_i if e.attributes[k]
      else
        rollup[k] << e.attributes[k] if e.attributes[k]
      end
    end
  end

  rollup.each { |k, v| rollup[k] = v.to_s unless k == 'size' }
  rollup
end

#create_digest_hashObject



249
250
251
252
253
254
255
256
257
258
259
# File 'lib/metadata/util/md5deep.rb', line 249

def create_digest_hash
  dHash = {}
  @opts.digest.each do |h|
    begin
      dHash[h.downcase] = Digest.const_get(h.upcase).new
    rescue NameError
      # If we are unable to load a digest, skip it.
    end
  end
  dHash
end

#fileOpen(currFile) ⇒ Object



183
184
185
186
187
188
189
# File 'lib/metadata/util/md5deep.rb', line 183

def fileOpen(currFile)
  if @fs
    @fs.fileOpen(currFile)
  else
    File.open(currFile)
  end
end

#get_dir_stats(dir) ⇒ Object



197
198
199
200
201
202
203
# File 'lib/metadata/util/md5deep.rb', line 197

def get_dir_stats(dir)
  if @fs
    {"size" => @fs.fileSize(dir), "atime" => @fs.fileAtime(dir).getutc.iso8601, "ctime" => @fs.fileCtime(dir).getutc.iso8601, "mtime" => @fs.fileMtime(dir).getutc.iso8601}
  else
    {"size" => File.size(dir), "atime" => File.stat(dir).atime.getutc.iso8601, "ctime" => File.stat(dir).ctime.getutc.iso8601, "mtime" => File.stat(dir).mtime.getutc.iso8601}
  end
end

#getFileContents(fh, xml_node) ⇒ Object



261
262
263
264
265
# File 'lib/metadata/util/md5deep.rb', line 261

def getFileContents(fh, xml_node)
  fh.seek(0, IO::SEEK_SET)
  buf = fh.read(1024000) || "" # read will return nil when at EOF.
  xml_node.add_element("contents", "compressed" => "true", "encoded" => "true").text = (MIQEncode.encode(buf))
end

#getFileStats(fh) ⇒ Object



191
192
193
194
195
# File 'lib/metadata/util/md5deep.rb', line 191

def getFileStats(fh)
  # If we are processing a member of the File class, use the File::Stat object to get data
  fh = fh.stat if fh.class == File
  {"size" => fh.size, "atime" => fh.atime.getutc.iso8601, "ctime" => fh.ctime.getutc.iso8601, "mtime" => fh.mtime.getutc.iso8601}
end

#isDir?(currFile) ⇒ Boolean

Returns:

  • (Boolean)


175
176
177
178
179
180
181
# File 'lib/metadata/util/md5deep.rb', line 175

def isDir?(currFile)
  if @fs
    @fs.fileDirectory?(currFile)
  else
    File.directory?(currFile)
  end
end

#process_dir_as_file(path, x, xml_node) ⇒ Object



122
123
124
125
126
127
128
129
130
131
132
# File 'lib/metadata/util/md5deep.rb', line 122

def process_dir_as_file(path, x, xml_node)
  if x != "." && x != ".."
    curr_dir = File.join(path, x)
    if isDir?(curr_dir)
      xml_file_node = xml_node.add_element("file", "name" => x, "fqname" => curr_dir)
      stat_hash = {}
      stat_hash.merge!(get_dir_stats(curr_dir))
      xml_file_node.add_attributes(stat_hash)
    end
  end
end

#process_each_glob_file(file_name) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/metadata/util/md5deep.rb', line 76

def process_each_glob_file(file_name)
  FindClassMethods.glob(file_name, @fs) do |f|
    # Prepending @drive_letter to the file is a work-around for issues
    # when scanning Win VMs from Linux where the path returned from glob
    # does not include the drive letter.
    f = File.join(@drive_letter, f)
    processFile(File.dirname(f), File.basename(f), @xml.root)
  end
rescue => err
  $log.error "process_each_glob_file: Exception #{err} rescued"
  $log.debug err.backtrace.join("\n")
end

#process_pe_header(pe_hdr, xml_file_node) ⇒ Object



167
168
169
170
171
172
173
# File 'lib/metadata/util/md5deep.rb', line 167

def process_pe_header(pe_hdr, xml_file_node)
  xml_file_node.add_element("versioninfo", pe_hdr.versioninfo) if @opts.versioninfo && pe_hdr.versioninfo.present?
  xml_file_node.add_element("libraries", "imports" => pe_hdr.getImportList) if @opts.imports && pe_hdr.imports.present?
rescue TypeError => err
  $log.info "process_pe_header: TypeError handling PEheader; skipping PEheader info"
  $log.debug err.backtrace.join("\n")
end

#processDir(path, x, xmlNode) ⇒ Object



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/metadata/util/md5deep.rb', line 103

def processDir(path, x, xmlNode)
  if x != "." && x != ".."
    currFile = File.join(path, x)

    begin
      if File.directory?(currFile)
        @fullDirCount += 1
        # $log.debug "DIR : #{currFile}"
        xmlSubNode = xmlNode.add_element("dir", "name" => x, "fqname" => currFile)
        xmlSubNode.add_attributes({"atime" => File.atime(currFile).getutc.iso8601, "ctime" => File.ctime(currFile).getutc.iso8601, "mtime" => File.mtime(currFile).getutc.iso8601})
        read_fs(currFile, xmlSubNode)
      end
    rescue Errno::EACCES, RuntimeError
    end
  else
    @fullDirCount += 1
  end
end

#processFile(path, x, xmlNode) ⇒ Object



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/metadata/util/md5deep.rb', line 134

def processFile(path, x, xmlNode)
  if (@opts.exclude.include?(x) == false) && x[0..0] != "$"
    currFile = File.join(path, x)

    begin
      #       unless File.directory?(currFile) then
      return if isDir?(currFile)

      # File we have an exclusion list and the current file is in it, skip to the next file
      @fullFileCount += 1
      fh = fileOpen(currFile)

      xmlFileNode = xmlNode.add_element("file", "name" => x, "fqname" => currFile)
      statHash = {}
      statHash.merge!(getFileStats(fh))
      statHash.merge!(calculate_digest(fh))
      xmlFileNode.add_attributes(statHash)

      ext = File.extname(currFile).downcase
      if @opts.winVerList.include?(ext)
        pe_hdr = PEheader.new(fh) rescue nil
        process_pe_header(pe_hdr, xmlFileNode) unless pe_hdr.nil?
      end

      getFileContents(fh, xmlFileNode) if @opts.contents == true
      fh.close
    rescue Errno::EACCES, RuntimeError, SystemCallError
      fh.close if fh.kind_of?(File) && !fh.closed?
    end
  end
  $log.debug "processFile: finished @xml is #{@xml}"
end

#read_fs(path, xmlNode) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/metadata/util/md5deep.rb', line 89

def read_fs(path, xmlNode)
  @drive_letter = @drive_letter.nil? ? "" : @drive_letter
  if @fs
    @fs.dirForeach(path)  { |x| processFile(path, x, xmlNode) }
    @fs.dirForeach(path)  { |x| processDir(path,  x, xmlNode) }
  else
    Dir.foreach(path)     { |x| processFile(path, x, xmlNode) }
    Dir.foreach(path)     { |x| processDir(path,  x, xmlNode) }
  end

  # Add up all the sums for all sub-elements
  xmlNode.add_attributes(calculate_sums(xmlNode))
end

#scan(path, rootID = "/") ⇒ Object



45
46
47
48
49
50
51
# File 'lib/metadata/util/md5deep.rb', line 45

def scan(path, rootID = "/")
  path = File.expand_path(path)
  rootID = rootID[2..-1] if rootID.length > 2 && rootID[1..1] == ":"
  xmlNode = @xml.root.add_element("dir", "name" => rootID)
  read_fs(path, xmlNode)
  @xml
end

#scan_glob(filename) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/metadata/util/md5deep.rb', line 58

def scan_glob(filename)
  filename.tr!("\\", "/")
  startDir = File.dirname(filename)
  @xml.root.add_attribute("base_path", startDir)
  path_prefix = startDir[0, 2]
  @drive_letter = path_prefix.match?(/^\w\:/) ? path_prefix : ""

  # First check if we are passed a fully qualifed file name
  if @fs.fileExists?(filename)
    base_file = File.basename(filename)
    isDir?(filename) ? process_dir_as_file(startDir, base_file, @xml.root) : processFile(startDir, base_file, @xml.root)
  else
    # If the file is not found then process the data as a glob pattern.
    process_each_glob_file(filename)
  end
  @xml
end

#to_xmlObject



267
268
269
# File 'lib/metadata/util/md5deep.rb', line 267

def to_xml
  @xml
end