Class: MD5deep

Inherits:
Object
  • Object
show all
Defined in:
lib/metadata/util/md5deep.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(fs = nil, options = {}) ⇒ MD5deep

Returns a new instance of MD5deep.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/metadata/util/md5deep.rb', line 13

def initialize(fs = nil, options = {})
  @fullFileCount = 0
  @fullDirCount = 0
  @drive_letter = nil

  # Create XML document
  @xml = XmlHash.createDoc(:filesystem)
  @fs = fs.kind_of?(MiqFS) ? fs : nil

  # Read optional parameters if they exist in the options hash
  @opts = {'versioninfo' => true, 'imports' => true, 'contents' => false,
    'exclude' => ["pagefile.sys", "hiberfil.sys", ".", ".."],
    'digest' => ["md5"], "winVerList" => %w(.exe .dll .ocx .scr)
  }.merge(options)
  # Make sure md5 is part of our digest array
  @opts['digest'].push("md5") unless @opts['digest'].include?("md5")
  # Convert hash to an OpenStruct for cleaner referencing
  @opts = OpenStruct.new(@opts)

  # Conditionally load digest libraries as needed.
  @opts.digest.each do |h|
    begin
      require 'digest/' + h.downcase
    rescue LoadError
      # This load error is not a concern since the standard digests are already included
      # in ruby by default, and the non-standard ones will be loaded by their name above.
      # $log.debug "Unable to load module for [#{h}]"
    end
  end
end

Instance Attribute Details

#fullDirCountObject (readonly)

Returns the value of attribute fullDirCount.



10
11
12
# File 'lib/metadata/util/md5deep.rb', line 10

def fullDirCount
  @fullDirCount
end

#fullFileCountObject (readonly)

Returns the value of attribute fullFileCount.



10
11
12
# File 'lib/metadata/util/md5deep.rb', line 10

def fullFileCount
  @fullFileCount
end

#importsObject

Returns the value of attribute imports.



11
12
13
# File 'lib/metadata/util/md5deep.rb', line 11

def imports
  @imports
end

#versioninfoObject

Returns the value of attribute versioninfo.



11
12
13
# File 'lib/metadata/util/md5deep.rb', line 11

def versioninfo
  @versioninfo
end

Class Method Details

.scan_glob(fs, filename, options = {}) ⇒ Object



52
53
54
55
# File 'lib/metadata/util/md5deep.rb', line 52

def self.scan_glob(fs, filename, options = {})
  md5 = MD5deep.new(fs, options)
  md5.scan_glob(filename)
end

Instance Method Details

#calculate_digest(fileName) ⇒ Object



223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/metadata/util/md5deep.rb', line 223

def calculate_digest(fileName)
  unless @opts.digest.empty?
    #      if fileName.class.to_s != "MiqFile"
    #        raise "File [#{fileName}] is not in a readable state." unless File.readable?(fileName)
    #      end

    # Create hash for requested digests
    digest = create_digest_hash

    begin
      fileName.seek(0, IO::SEEK_SET)
      # Loop over each digest and add the file contents
      while (buf = fileName.read(10_240_000))
        digest.each_pair { |_k, v| v << buf }
      end
    rescue => err
      $log.error "Error #{err} reading file to calculate digest"
      $log.debug err.backtrace.join("\n")
    end
  end

  digest.each_pair { |k, v| digest[k] = v.to_s }
  digest
end

#calculate_sums(xmlNode) ⇒ Object



204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/metadata/util/md5deep.rb', line 204

def calculate_sums(xmlNode)
  rollup = create_digest_hash
  # Add size to the hash as a Fixnum
  rollup['size'] = 0

  xmlNode.each_element do |e|
    rollup.each_pair do |k, _v|
      if k == "size"
        rollup[k] += e.attributes[k].to_i if e.attributes[k]
      else
        rollup[k] << e.attributes[k] if e.attributes[k]
      end
    end
  end

  rollup.each { |k, v| rollup[k] = v.to_s unless k == 'size' }
  rollup
end

#create_digest_hashObject



248
249
250
251
252
253
254
255
256
257
258
# File 'lib/metadata/util/md5deep.rb', line 248

def create_digest_hash
  dHash = {}
  @opts.digest.each do |h|
    begin
      dHash[h.downcase] = Digest.const_get(h.upcase).new
    rescue NameError
      # If we are unable to load a digest, skip it.
    end
  end
  dHash
end

#fileOpen(currFile) ⇒ Object



182
183
184
185
186
187
188
# File 'lib/metadata/util/md5deep.rb', line 182

def fileOpen(currFile)
  if @fs
    @fs.fileOpen(currFile)
  else
    File.open(currFile)
  end
end

#get_dir_stats(dir) ⇒ Object



196
197
198
199
200
201
202
# File 'lib/metadata/util/md5deep.rb', line 196

def get_dir_stats(dir)
  if @fs
    {"size" => @fs.fileSize(dir), "atime" => @fs.fileAtime(dir).getutc.iso8601, "ctime" => @fs.fileCtime(dir).getutc.iso8601, "mtime" => @fs.fileMtime(dir).getutc.iso8601}
  else
    {"size" => File.size(dir), "atime" => File.stat(dir).atime.getutc.iso8601, "ctime" => File.stat(dir).ctime.getutc.iso8601, "mtime" => File.stat(dir).mtime.getutc.iso8601}
  end
end

#getFileContents(fh, xml_node) ⇒ Object



260
261
262
263
264
# File 'lib/metadata/util/md5deep.rb', line 260

def getFileContents(fh, xml_node)
  fh.seek(0, IO::SEEK_SET)
  buf = fh.read(1024000) || "" # read will return nil when at EOF.
  xml_node.add_element("contents", "compressed" => "true", "encoded" => "true").text = (MIQEncode.encode(buf))
end

#getFileStats(fh) ⇒ Object



190
191
192
193
194
# File 'lib/metadata/util/md5deep.rb', line 190

def getFileStats(fh)
  # If we are processing a member of the File class, use the File::Stat object to get data
  fh = fh.stat if fh.class == File
  {"size" => fh.size, "atime" => fh.atime.getutc.iso8601, "ctime" => fh.ctime.getutc.iso8601, "mtime" => fh.mtime.getutc.iso8601}
end

#isDir?(currFile) ⇒ Boolean

Returns:

  • (Boolean)


174
175
176
177
178
179
180
# File 'lib/metadata/util/md5deep.rb', line 174

def isDir?(currFile)
  if @fs
    @fs.fileDirectory?(currFile)
  else
    File.directory?(currFile)
  end
end

#process_dir_as_file(path, x, xml_node) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
# File 'lib/metadata/util/md5deep.rb', line 121

def process_dir_as_file(path, x, xml_node)
  if x != "." && x != ".."
    curr_dir = File.join(path, x)
    if isDir?(curr_dir)
      xml_file_node = xml_node.add_element("file", "name" => x, "fqname" => curr_dir)
      stat_hash = {}
      stat_hash.merge!(get_dir_stats(curr_dir))
      xml_file_node.add_attributes(stat_hash)
    end
  end
end

#process_each_glob_file(file_name) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/metadata/util/md5deep.rb', line 75

def process_each_glob_file(file_name)
  FindClassMethods.glob(file_name, @fs) do |f|
    # Prepending @drive_letter to the file is a work-around for issues
    # when scanning Win VMs from Linux where the path returned from glob
    # does not include the drive letter.
    f = File.join(@drive_letter, f)
    processFile(File.dirname(f), File.basename(f), @xml.root)
  end
rescue => err
  $log.error "process_each_glob_file: Exception #{err} rescued"
  $log.debug err.backtrace.join("\n")
end

#process_pe_header(pe_hdr, xml_file_node) ⇒ Object



166
167
168
169
170
171
172
# File 'lib/metadata/util/md5deep.rb', line 166

def process_pe_header(pe_hdr, xml_file_node)
  xml_file_node.add_element("versioninfo", pe_hdr.versioninfo) if @opts.versioninfo && pe_hdr.versioninfo.present?
  xml_file_node.add_element("libraries", "imports" => pe_hdr.getImportList) if @opts.imports && pe_hdr.imports.present?
rescue TypeError => err
  $log.info "process_pe_header: TypeError handling PEheader; skipping PEheader info"
  $log.debug err.backtrace.join("\n")
end

#processDir(path, x, xmlNode) ⇒ Object



102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/metadata/util/md5deep.rb', line 102

def processDir(path, x, xmlNode)
  if x != "." && x != ".."
    currFile = File.join(path, x)

    begin
      if File.directory?(currFile)
        @fullDirCount += 1
        # $log.debug "DIR : #{currFile}"
        xmlSubNode = xmlNode.add_element("dir", "name" => x, "fqname" => currFile)
        xmlSubNode.add_attributes({"atime" => File.atime(currFile).getutc.iso8601, "ctime" => File.ctime(currFile).getutc.iso8601, "mtime" => File.mtime(currFile).getutc.iso8601})
        read_fs(currFile, xmlSubNode)
      end
    rescue Errno::EACCES, RuntimeError
    end
  else
    @fullDirCount += 1
  end
end

#processFile(path, x, xmlNode) ⇒ Object



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/metadata/util/md5deep.rb', line 133

def processFile(path, x, xmlNode)
  if (@opts.exclude.include?(x) == false) && x[0..0] != "$"
    currFile = File.join(path, x)

    begin
      #       unless File.directory?(currFile) then
      return if isDir?(currFile)

      # File we have an exclusion list and the current file is in it, skip to the next file
      @fullFileCount += 1
      fh = fileOpen(currFile)

      xmlFileNode = xmlNode.add_element("file", "name" => x, "fqname" => currFile)
      statHash = {}
      statHash.merge!(getFileStats(fh))
      statHash.merge!(calculate_digest(fh))
      xmlFileNode.add_attributes(statHash)

      ext = File.extname(currFile).downcase
      if @opts.winVerList.include?(ext)
        pe_hdr = PEheader.new(fh) rescue nil
        process_pe_header(pe_hdr, xmlFileNode) unless pe_hdr.nil?
      end

      getFileContents(fh, xmlFileNode) if @opts.contents == true
      fh.close
    rescue Errno::EACCES, RuntimeError, SystemCallError
      fh.close if fh.kind_of?(File) && !fh.closed?
    end
  end
  $log.debug "processFile: finished @xml is #{@xml}"
end

#read_fs(path, xmlNode) ⇒ Object



88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/metadata/util/md5deep.rb', line 88

def read_fs(path, xmlNode)
  @drive_letter = @drive_letter.nil? ? "" : @drive_letter
  if @fs
    @fs.dirForeach(path)  { |x| processFile(path, x, xmlNode) }
    @fs.dirForeach(path)  { |x| processDir(path,  x, xmlNode) }
  else
    Dir.foreach(path)     { |x| processFile(path, x, xmlNode) }
    Dir.foreach(path)     { |x| processDir(path,  x, xmlNode) }
  end

  # Add up all the sums for all sub-elements
  xmlNode.add_attributes(calculate_sums(xmlNode))
end

#scan(path, rootID = "/") ⇒ Object



44
45
46
47
48
49
50
# File 'lib/metadata/util/md5deep.rb', line 44

def scan(path, rootID = "/")
  path = File.expand_path(path)
  rootID = rootID[2..-1] if rootID.length > 2 && rootID[1..1] == ":"
  xmlNode = @xml.root.add_element("dir", "name" => rootID)
  read_fs(path, xmlNode)
  @xml
end

#scan_glob(filename) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/metadata/util/md5deep.rb', line 57

def scan_glob(filename)
  filename.tr!("\\", "/")
  startDir = File.dirname(filename)
  @xml.root.add_attribute("base_path", startDir)
  path_prefix = startDir[0, 2]
  @drive_letter = path_prefix.match?(/^\w\:/) ? path_prefix : ""

  # First check if we are passed a fully qualifed file name
  if @fs.fileExists?(filename)
    base_file = File.basename(filename)
    isDir?(filename) ? process_dir_as_file(startDir, base_file, @xml.root) : processFile(startDir, base_file, @xml.root)
  else
    # If the file is not found then process the data as a glob pattern.
    process_each_glob_file(filename)
  end
  @xml
end

#to_xmlObject



266
267
268
# File 'lib/metadata/util/md5deep.rb', line 266

def to_xml
  @xml
end