Class: FileMonitoring::DirStat

Inherits:
Object
  • Object
show all
Defined in:
lib/file_monitoring/monitor_path.rb

Overview

This class holds current state of directory and methods to control changes

Constant Summary collapse

@@log =
nil

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ DirStat

Initializes new directory monitoring object

Arguments:

  • path - Dir location



153
154
155
156
157
158
159
160
161
162
163
# File 'lib/file_monitoring/monitor_path.rb', line 153

def initialize(path)
  @path = path
  @dirs = {}
  @files = {}
  @non_utf8_paths = {}  # Hash: ["path" -> true|false]
  @symlinks = {}  # Hash: [[server, link file name]] -> " target file name"]
  # indicates if path EXISTS in file system.
  #   If true, file will not be removed during removed_unmarked_paths phase.
  @marked = false

end

Instance Attribute Details

#markedObject

Returns the value of attribute marked.



132
133
134
# File 'lib/file_monitoring/monitor_path.rb', line 132

def marked
  @marked
end

#pathObject

Returns the value of attribute path.



132
133
134
# File 'lib/file_monitoring/monitor_path.rb', line 132

def path
  @path
end

Class Method Details

.set_log(log) ⇒ Object



136
137
138
# File 'lib/file_monitoring/monitor_path.rb', line 136

def self.set_log (log)
  @@log = log
end

Instance Method Details



414
415
416
417
418
419
420
421
422
423
# File 'lib/file_monitoring/monitor_path.rb', line 414

def add_found_symlinks(globed_path, found_symlinks)
  # if symlink - add to symlink temporary map and content data (even override).
  # later all non existing symlinks will be removed from content data
  pointed_file_name = File.readlink(globed_path)
  found_symlinks[globed_path] = pointed_file_name
  # add to content data
  $local_content_data_lock.synchronize{
    $local_content_data.add_symlink(Params['local_server_name'], globed_path, pointed_file_name)
  }
end

#handle_dir(globed_path, file_attr_to_checksum) ⇒ Object



400
401
402
403
404
405
406
407
408
409
410
411
412
# File 'lib/file_monitoring/monitor_path.rb', line 400

def handle_dir(globed_path, file_attr_to_checksum)
  # ------------------------------ DIR -----------------------
  child_stat = @dirs[globed_path]
  unless child_stat
    # ----------- ADD NEW DIR
    child_stat = DirStat.new(globed_path)
    add_dir(child_stat)
    write_to_log("NEW dir: " + globed_path)
  end
  child_stat.marked = true
  # recursive call for dirs
  child_stat.monitor(file_attr_to_checksum)
end

#handle_existing_file(child_stat, globed_path, globed_path_stat) ⇒ Object



336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
# File 'lib/file_monitoring/monitor_path.rb', line 336

def handle_existing_file(child_stat, globed_path, globed_path_stat)
  if child_stat.changed?(globed_path_stat)
    # ---------- STATUS CHANGED
    # Update changed status
    child_stat.state = FileStatEnum::CHANGED
    child_stat.cycles = 0
    child_stat.size = globed_path_stat.size
    child_stat.modification_time = globed_path_stat.mtime.to_i
    write_to_log("CHANGED file: " + globed_path)
    # remove file with changed checksum. File will be added once indexed
    $local_content_data_lock.synchronize{
      $local_content_data.remove_instance(Params['local_server_name'], globed_path)
    }
  else  # case child_stat did not change
    # ---------- SAME STATUS
    # File status is the same
    if child_stat.state != FileStatEnum::STABLE
      child_stat.cycles += 1
      if child_stat.cycles >= ::FileMonitoring.stable_state
        child_stat.state = FileStatEnum::STABLE
        write_to_log("STABLE file: " + globed_path)
      else
        child_stat.state = FileStatEnum::UNCHANGED
        write_to_log("UNCHANGED file: " + globed_path)
      end
    end
  end
end

#handle_moved_file(globed_path, globed_path_stat, file_attr_to_checksum) ⇒ Object

This method handles the case where we set the ‘manual_file_changes’ param meaning some files were moved/copied and no need to reindex them. In that case search “new files” in old files to get the checksum (skipp the index phase). The lookup is done via specially prepared file_attr_to_checksum map.



369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
# File 'lib/file_monitoring/monitor_path.rb', line 369

def handle_moved_file(globed_path, globed_path_stat, file_attr_to_checksum)
  # --------------------- MANUAL MODE
  # check if file name and attributes exist in global file attr map
  file_attr_key = [File.basename(globed_path), globed_path_stat.size, globed_path_stat.mtime.to_i]
  file_ident_info = file_attr_to_checksum[file_attr_key]
  # If not found (real new file) or found but not unique then file needs indexing. skip in manual mode.
  if file_ident_info && file_ident_info.unique
    Log.debug1("update content data with file:%s  checksum:%s  index_time:%s",
               File.basename(globed_path), file_ident_info.checksum, file_ident_info.index_time.to_s)
    # update content data (no need to update Dir tree)
    $local_content_data_lock.synchronize{
      $local_content_data.add_instance(file_ident_info.checksum,
                                       globed_path_stat.size,
                                       Params['local_server_name'],
                                       globed_path,
                                       globed_path_stat.mtime.to_i,
                                       file_ident_info.index_time)
    }
  end
end

#handle_new_file(child_stat, globed_path, globed_path_stat) ⇒ Object



390
391
392
393
394
395
396
397
398
# File 'lib/file_monitoring/monitor_path.rb', line 390

def handle_new_file(child_stat, globed_path, globed_path_stat)
  child_stat = FileStat.new(globed_path,
                            FileStatEnum::NEW,
                            globed_path_stat.size,
                            globed_path_stat.mtime.to_i)
  write_to_log("NEW file: " + globed_path)
  child_stat.marked = true
  add_file(child_stat)
end

#has_dir?(path) ⇒ Boolean

Checks that there is a sub-folder with a given path.

Returns:

  • (Boolean)


254
255
256
# File 'lib/file_monitoring/monitor_path.rb', line 254

def has_dir?(path)
  @dirs.has_key?(path)
end

#has_file?(path) ⇒ Boolean

Checks that there is a file with a given path.

Returns:

  • (Boolean)


259
260
261
# File 'lib/file_monitoring/monitor_path.rb', line 259

def has_file?(path)
  @files.has_key?(path)
end

#indexObject



502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
# File 'lib/file_monitoring/monitor_path.rb', line 502

def index
  files_enum = @files.each_value
  index_counter = $indexed_file_count  # to check if files where actually indexed
  loop do
    file_stat = files_enum.next rescue break
    file_stat.index  # file index
  end
  GC.start if index_counter != $indexed_file_count  # GC only if files where indexed

  dirs_enum = @dirs.each_value
  loop do
    dir_stat = dirs_enum.next rescue break
    dir_stat.index  # dir recursive call
  end
end

#is_globed_path_valid(globed_path) ⇒ Object

Checks that the globed path is valid.



320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
# File 'lib/file_monitoring/monitor_path.rb', line 320

def is_globed_path_valid(globed_path)
  # UTF-8 - keep only files with names in
  return true if @non_utf8_paths[globed_path]
  check_utf_8_encoding_file = globed_path.clone
  unless check_utf_8_encoding_file.force_encoding("UTF-8").valid_encoding?
    Log.warning("Non UTF-8 file name '#{check_utf_8_encoding_file}', skipping.")
    @non_utf8_paths[globed_path] = true
    # TODO(bbfsdev): Remove line below and redundant clones of string after
    # those lines are not a GC problem.
    check_utf_8_encoding_file = nil
    return false
  end

  true
end

#load_instance(sub_paths, sub_paths_index, size, modification_time) ⇒ Object

add instance while initializing tree using content data from file Parameters:

sub_paths - Array of sub paths of the instance which is added to tree
            Example:
              instance path = /dir1/dir2/file_name
                Sub path 1: /dir1
                Sub path 2: /dir1/dir2
                Sub path 3: /dir1/dir2/file_name
            sub paths would create DirStat objs or FileStat(FileStat create using last sub path).
sub_paths_index - the index indicates the next sub path to insert to the tree
                  the index will be raised at each recursive call down the tree
size - the instance size to insert to the tree
modification_time - the instance modification_time to insert to the tree


178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/file_monitoring/monitor_path.rb', line 178

def load_instance(sub_paths, sub_paths_index, size, modification_time)
  # initialize dirs and files.
  @dirs = {} unless @dirs
  @files = {} unless @files
  if sub_paths.size-1 == sub_paths_index
    # Add File case - index points to last entry - leaf case.
    file_stat = FileStat.new(sub_paths[sub_paths_index], FileStatEnum::STABLE, size, modification_time, true)
    add_file(file_stat)
  else
    # Add Dir to tree if not present. index points to new dir path.
    dir_stat = @dirs[sub_paths[sub_paths_index]]
    #create new dir if not exist
    unless dir_stat
      dir_stat = DirStat.new(sub_paths[sub_paths_index])
      add_dir(dir_stat)
    end
    # continue recursive call on tree with next sub path index
    dir_stat.load_instance(sub_paths, sub_paths_index+1, size, modification_time)
  end
end

add symlink while initializing tree using content data from file.

Assumption is that Tree already built

Parameters:

sub_paths - Array of sub paths of the symlink which is added to tree
            Example:
              instance path = /dir1/dir2/file_name
                Sub path 1: /dir1
                Sub path 2: /dir1/dir2
                Sub path 3: /dir1/dir2/file_name
            sub paths would create DirStat objs or FileStat(FileStat create using last sub path).
sub_paths_index - the index indicates the next sub path to insert to the tree
                  the index will be raised at each recursive call down the tree
symlink_path - symlink file path
symlink_target - the target path pointed by the symlink


213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/file_monitoring/monitor_path.rb', line 213

def load_symlink(sub_paths, sub_paths_index, symlink_path, symlink_target)
  # initialize dirs and files.
  @dirs = {} unless @dirs
  @files = {} unless @files
  if sub_paths.size-1 == sub_paths_index
    # index points to last entry - leaf case. Add the symlink.
    @symlinks[symlink_path] = symlink_target
  else
    # Add Dir to tree if not present. index points to new dir path.
    dir_stat = @dirs[sub_paths[sub_paths_index]]
    #create new dir if not exist
    unless dir_stat
      dir_stat = DirStat.new(sub_paths[sub_paths_index])
      add_dir(dir_stat)
    end
    # continue recursive call on tree with next sub path index
    dir_stat.load_instance(sub_paths, sub_paths_index+1, symlink_path, symlink_target)
  end
end

#monitor(file_attr_to_checksum = nil) ⇒ Object

Recursively, read files and dirs lists from file system (using Glob)

  • Adds new filesdirs.

  • Change state for existing filesdirs

  • Index stable files

  • Remove non existing filesdirs is handled in method: remove_unmarked_paths

  • Handles special case for param ‘manual_file_changes’ where files are moved and there is no need to index them



447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
# File 'lib/file_monitoring/monitor_path.rb', line 447

def monitor(file_attr_to_checksum=nil)

  # Marking/Removing Algorithm:
  # assume that current dir is present
  # ls (glob) the dir path for child dirs and files
  # if child file is not already present, add it as new, mark it and handle its state
  # if file already present, mark it and handle its state.
  # if child dir is not already present, add it as new, mark it and propagates
  #    the recursive call
  # if child dir already present, mark it and handle its state
  # marked files will not be remove in next remove phase

  # ls (glob) the dir path for child dirs and files
  globed_paths_enum = Dir.glob(@path + "/*").to_enum
  
  found_symlinks = {}  # Store found symlinks under dir
  loop do
    globed_path = globed_paths_enum.next rescue break

    next unless is_globed_path_valid(globed_path)
    if File.symlink?(globed_path)
      add_found_symlinks(globed_path, found_symlinks)
      next
    end

    # Get File \ Dir status
    globed_path_stat = File.lstat(globed_path) rescue next  # File or dir removed from OS file system
    if globed_path_stat.file?
      # ----------------------------- FILE -----------------------
      child_stat = @files[globed_path]
      if child_stat
        # Mark that file exists (will not be deleted at end of monitoring)
        child_stat.marked = true
        # Handle existing file If we are not in manual mode.
        # In manual mode do nothing
        handle_existing_file(child_stat, globed_path, globed_path_stat) unless Params['manual_file_changes']
      else
        unless Params['manual_file_changes']
          # Handle regular case of new file.
          handle_new_file(child_stat, globed_path, globed_path_stat)
        else
          # Only create new content data instance based on copied/moved filed.
          handle_moved_file(globed_path, globed_path_stat, file_attr_to_checksum)
        end
      end
    else
      handle_dir(globed_path, file_attr_to_checksum)
    end
  end

  remove_not_found_symlinks(found_symlinks)

  GC.start
end


425
426
427
428
429
430
431
432
433
434
435
436
437
438
# File 'lib/file_monitoring/monitor_path.rb', line 425

def remove_not_found_symlinks(found_symlinks)
  # check if any symlink was removed and update current symlinks map
  symlinks_enum = @symlinks.each_key
  loop {
    symlink_key = symlinks_enum.next rescue break
    unless found_symlinks.has_key?(symlink_key)
      # symlink was removed. remove from content data
      $local_content_data_lock.synchronize{
        $local_content_data.remove_symlink(Params['local_server_name'], symlink_key)
      }
    end
  }
  @symlinks = found_symlinks
end

#removed_unmarked_pathsObject

Recursively, remove non existing files and dirs in Tree



278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/file_monitoring/monitor_path.rb', line 278

def removed_unmarked_paths
  #remove dirs
  dirs_enum = @dirs.each_value
  loop do
    dir_stat = dirs_enum.next rescue break
    if dir_stat.marked
      dir_stat.marked = false  # unset flag for next monitoring\index\remove phase
      #recursive call
      dir_stat.removed_unmarked_paths
    else
      # directory is not marked. Remove it, since it does not exist.
      write_to_log("NON_EXISTING dir: " + dir_stat.path)
      # remove file with changed checksum
      $local_content_data_lock.synchronize{
        $local_content_data.remove_directory(dir_stat.path, Params['local_server_name'])
      }
      rm_dir(dir_stat)
    end
  end

  #remove files
  files_enum = @files.each_value
  loop do
    file_stat = files_enum.next rescue break
    if file_stat.marked
      file_stat.marked = false  # unset flag for next monitoring\index\remove phase
    else
      # file not marked meaning it is no longer exist. Remove.
      write_to_log("NON_EXISTING file: " + file_stat.path)
      # remove file with changed checksum
      $local_content_data_lock.synchronize{
        $local_content_data.remove_instance(Params['local_server_name'], file_stat.path)
      }
      # remove from tree
      @files.delete(file_stat.path)
    end
  end
end

#to_s(indent = 0) ⇒ Object

Returns string which contains path and state of this directory as well as it’s structure.



264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/file_monitoring/monitor_path.rb', line 264

def to_s(indent = 0)
  indent_increment = 2
  child_indent = indent + indent_increment
  res = super()
  @files.each_value do |file|
    res += "\n" + file.to_s(child_indent)
  end if @files
  @dirs.each_value do |dir|
    res += "\n" + dir.to_s(child_indent)
  end if @dirs
  res
end

#write_to_log(msg) ⇒ Object



140
141
142
143
144
145
# File 'lib/file_monitoring/monitor_path.rb', line 140

def write_to_log(msg)
  if @@log
    @@log.info(msg)
    @@log.outputters[0].flush if Params['log_flush_each_message']
  end
end