Class: FileMonitoring::DirStat

Inherits:
Object
  • Object
show all
Defined in:
lib/file_monitoring/monitor_path.rb

Overview

This class holds current state of directory and methods to control changes

Constant Summary collapse

@@log =
nil

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ DirStat

Initializes new directory monitoring object

Arguments:

  • path - Dir location



146
147
148
149
150
151
152
153
154
155
156
# File 'lib/file_monitoring/monitor_path.rb', line 146

def initialize(path)
  @path = path
  @dirs = {}
  @files = {}
  @non_utf8_paths = {}  # Hash: ["path" -> true|false]

  # indicates if path EXISTS in file system.
  #   If true, file will not be removed during removed_unmarked_paths phase.
  @marked = false

end

Instance Attribute Details

#markedObject

Returns the value of attribute marked.



132
133
134
# File 'lib/file_monitoring/monitor_path.rb', line 132

def marked
  @marked
end

#pathObject

Returns the value of attribute path.



132
133
134
# File 'lib/file_monitoring/monitor_path.rb', line 132

def path
  @path
end

Class Method Details

.set_log(log) ⇒ Object



136
137
138
# File 'lib/file_monitoring/monitor_path.rb', line 136

def self.set_log (log)
  @@log = log
end

Instance Method Details

#has_dir?(path) ⇒ Boolean

Checks that there is a sub-folder with a given path.

Returns:

  • (Boolean)


213
214
215
# File 'lib/file_monitoring/monitor_path.rb', line 213

def has_dir?(path)
  @dirs.has_key?(path)
end

#has_file?(path) ⇒ Boolean

Checks that there is a file with a given path.

Returns:

  • (Boolean)


218
219
220
# File 'lib/file_monitoring/monitor_path.rb', line 218

def has_file?(path)
  @files.has_key?(path)
end

#indexObject



419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
# File 'lib/file_monitoring/monitor_path.rb', line 419

def index
  files_enum = @files.each_value
  index_counter = $indexed_file_count  # to check if files where actually indexed
  loop do
    file_stat = files_enum.next rescue break
    file_stat.index  # file index
  end
  GC.start if index_counter != $indexed_file_count  # GC only if files where indexed

  dirs_enum = @dirs.each_value
  loop do
    dir_stat = dirs_enum.next rescue break
    dir_stat.index  # dir recursive call
  end
end

#load_instance(sub_paths, sub_paths_index, size, modification_time) ⇒ Object

add instance while initializing tree using content data from file Parameters:

sub_paths - Array of sub paths of the instance which is added to tree
            Example:
              instance path = /dir1/dir2/file_name
                Sub path 1: /dir1
                Sub path 2: /dir1/dir2
                Sub path 3: /dir1/dir2/file_name
            sub paths would create DirStat objs or FileStat(FileStat create using last sub path).
sub_paths_index - the index indicates the next sub path to insert to the tree
                  the index will be raised at each recursive call down the tree
size - the instance size to insert to the tree
modification_time - the instance modification_time to insert to the tree


171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/file_monitoring/monitor_path.rb', line 171

def load_instance(sub_paths, sub_paths_index, size, modification_time)
  # initialize dirs and files. This will indicate that the current DirStat is not new.
  @dirs = {} unless @dirs
  @files = {} unless @files
  if sub_paths.size-1 == sub_paths_index
    # Add File case - index points to last entry - leaf case.
    file_stat = FileStat.new(sub_paths[sub_paths_index], FileStatEnum::STABLE, size, modification_time, true)
    add_file(file_stat)
  else
    # Add Dir to tree if not present. index points to new dir path.
    dir_stat = @dirs[sub_paths[sub_paths_index]]
    #create new dir if not exist
    unless dir_stat
      dir_stat = DirStat.new(sub_paths[sub_paths_index])
      add_dir(dir_stat)
    end
    # continue recursive call on tree with next sub path index
    dir_stat.load_instance(sub_paths, sub_paths_index+1, size, modification_time)
  end
end

#monitor(file_attr_to_checksum = nil) ⇒ Object

Recursively, read files and dirs from file system (using Glob) Handle new filesdirs. Change state for existing filesdirs Index stable files Remove non existing filesdirs is handled in method: remove_unmarked_paths



287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
# File 'lib/file_monitoring/monitor_path.rb', line 287

def monitor(file_attr_to_checksum=nil)

  # Algorithm:
  # assume that current dir is present
  # ls (glob) the dir path for child dirs and files
  # if child file is not already present, add it as new, mark it and handle its state
  # if file already present, mark it and handle its state.
  # if child dir is not already present, add it as new, mark it and propagates
  #    the recursive call
  # if child dir already present, mark it and handle its state
  # marked files will not be remove in next remove phase

  # ls (glob) the dir path for child dirs and files
  globed_paths_enum = Dir.glob(@path + "/*").to_enum
  loop do
    globed_path = globed_paths_enum.next rescue break

    # if symlink - skip
    next if File.symlink?(globed_path)

    # UTF-8 - keep only files with names in
    next if @non_utf8_paths[globed_path]
    check_utf_8_encoding_file = globed_path.clone
    unless check_utf_8_encoding_file.force_encoding("UTF-8").valid_encoding?
      Log.warning("Non UTF-8 file name '#{check_utf_8_encoding_file}', skipping.")
      @non_utf8_paths[globed_path]=true
      check_utf_8_encoding_file=nil
      next
    end

    # Get File \ Dir status
    globed_path_stat = File.lstat(globed_path) rescue next  # File or dir removed from OS file system
    if globed_path_stat.file?
      # ----------------------------- FILE -----------------------
      child_stat = @files[globed_path]
      if child_stat
        # -------------- EXISTS in Tree
        unless Params['manual_file_changes']
          # --------- NON MANUAL MODE
          child_stat.marked = true
          if child_stat.changed?(globed_path_stat)
            # ---------- STATUS CHANGED
            # Update changed status
            child_stat.state = FileStatEnum::CHANGED
            child_stat.cycles = 0
            child_stat.size = globed_path_stat.size
            child_stat.modification_time = globed_path_stat.mtime.to_i
            if @@log
              @@log.info("CHANGED file: " + globed_path)
              @@log.outputters[0].flush if Params['log_flush_each_message']
            end
            # remove file with changed checksum. File will be added once indexed
            $local_content_data_lock.synchronize{
              $local_content_data.remove_instance(Params['local_server_name'], globed_path)
            }
          else  # case child_stat did not change
            # ---------- SAME STATUS
            # File status is the same
            if child_stat.state != FileStatEnum::STABLE
              child_stat.state = FileStatEnum::UNCHANGED
              child_stat.cycles += 1
              if child_stat.cycles >= ::FileMonitoring.stable_state
                child_stat.state = FileStatEnum::STABLE
                if @@log
                  @@log.info("STABLE file: " + globed_path)
                  @@log.outputters[0].flush if Params['log_flush_each_message']
                end
              else
                if @@log
                  @@log.info("UNCHANGED file: " + globed_path)
                  @@log.outputters[0].flush if Params['log_flush_each_message']
                end
              end
            end
          end
        else  # case Params['manual_file_changes']
          # --------- MANUAL MODE
          child_stat.marked = true
        end
      else
        # ---------------------------- NEW FILE ----------
        unless Params['manual_file_changes']
          child_stat = FileStat.new(globed_path,
                                    FileStatEnum::NEW,
                                    globed_path_stat.size,
                                    globed_path_stat.mtime.to_i)
          if @@log
            @@log.info("NEW file: " + globed_path)
            @@log.outputters[0].flush if Params['log_flush_each_message']
          end
          child_stat.marked = true
          add_file(child_stat)
        else  # case Params['manual_file_changes']
          # --------------------- MANUAL MODE
          # check if file name and attributes exist in global file attr map
          file_attr_key = [File.basename(globed_path), globed_path_stat.size, globed_path_stat.mtime.to_i]
          file_ident_info = file_attr_to_checksum[file_attr_key]
          # If not found (real new file) or found but not unique then file needs indexing. skip in manual mode.
          next unless (file_ident_info and file_ident_info.unique)
          Log.debug1("update content data with file:%s  checksum:%s  index_time:%s",
                     File.basename(globed_path), file_ident_info.checksum, file_ident_info.index_time.to_s)
          # update content data (no need to update Dir tree)
          $local_content_data_lock.synchronize{
            $local_content_data.add_instance(file_ident_info.checksum,
                                             globed_path_stat.size,
                                             Params['local_server_name'],
                                             globed_path,
                                             globed_path_stat.mtime.to_i,
                                             file_ident_info.index_time)
          }
        end
      end
    else
      # ------------------------------ DIR -----------------------
      child_stat = @dirs[globed_path]
      unless child_stat
        # ----------- ADD NEW DIR
        child_stat = DirStat.new(globed_path)
        add_dir(child_stat)
        if @@log
          @@log.info("NEW dir: " + globed_path)
          @@log.outputters[0].flush if Params['log_flush_each_message']
        end
      end
      child_stat.marked = true
      # recursive call for dirs
      child_stat.monitor(file_attr_to_checksum)
    end
  end
  GC.start
end

#removed_unmarked_pathsObject

Recursively, remove non existing files and dirs in Tree



237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/file_monitoring/monitor_path.rb', line 237

def removed_unmarked_paths
  #remove dirs
  dirs_enum = @dirs.each_value
  loop do
    dir_stat = dirs_enum.next rescue break
    if dir_stat.marked
      dir_stat.marked = false  # unset flag for next monitoring\index\remove phase
      #recursive call
      dir_stat.removed_unmarked_paths
    else
      # directory is not marked. Remove it, since it does not exist.
      if @@log
        @@log.info("NON_EXISTING dir: " + dir_stat.path)
        @@log.outputters[0].flush if Params['log_flush_each_message']
      end
      # remove file with changed checksum
      $local_content_data_lock.synchronize{
        $local_content_data.remove_directory(dir_stat.path, Params['local_server_name'])
      }
      rm_dir(dir_stat)
    end
  end

  #remove files
  files_enum = @files.each_value
  loop do
    file_stat = files_enum.next rescue break
    if file_stat.marked
      file_stat.marked = false  # unset flag for next monitoring\index\remove phase
    else
      # file not marked meaning it is no longer exist. Remove.
      if @@log
        @@log.info("NON_EXISTING file: " + file_stat.path)
        @@log.outputters[0].flush if Params['log_flush_each_message']
      end
      # remove file with changed checksum
      $local_content_data_lock.synchronize{
        $local_content_data.remove_instance(Params['local_server_name'], file_stat.path)
      }
      # remove from tree
      @files.delete(file_stat.path)
    end
  end
end

#to_s(indent = 0) ⇒ Object

Returns string which contains path and state of this directory as well as it’s structure.



223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/file_monitoring/monitor_path.rb', line 223

def to_s(indent = 0)
  indent_increment = 2
  child_indent = indent + indent_increment
  res = super
  @files.each_value do |file|
    res += "\n" + file.to_s(child_indent)
  end if @files
  @dirs.each_value do |dir|
    res += "\n" + dir.to_s(child_indent)
  end if @dirs
  res
end