Class: FileIndexing::IndexAgent
- Inherits:
-
Object
- Object
- FileIndexing::IndexAgent
- Defined in:
- lib/file_indexing/index_agent.rb
Constant Summary collapse
- LOCALTZ =
Why are those lines needed?
Time.now.zone
Instance Attribute Summary collapse
-
#failed_files ⇒ Object
readonly
Returns the value of attribute failed_files.
-
#indexed_content ⇒ Object
readonly
Returns the value of attribute indexed_content.
Class Method Summary collapse
- .create_shallow_instance(filename) ⇒ Object
-
.get_checksum(filename) ⇒ Object
Calculate file checksum (SHA1).
- .get_content_checksum(content) ⇒ Object
-
.get_correct_mtime(file) ⇒ Object
TODO(kolman): Replace this with File.lstat(file).mtime when new version of Ruby comes out.
- .global_path(filename) ⇒ Object
Instance Method Summary collapse
-
#collect(pattern) ⇒ Object
get all files satisfying the pattern.
-
#index(patterns, otherDB = nil) ⇒ Object
index device according to the pattern store the result does not adds automatically otherDB to stored result TODO device support.
-
#initialize ⇒ IndexAgent
constructor
A new instance of IndexAgent.
Constructor Details
#initialize ⇒ IndexAgent
Returns a new instance of IndexAgent.
23 24 25 26 |
# File 'lib/file_indexing/index_agent.rb', line 23 def initialize @indexed_content = ContentData::ContentData.new @failed_files = Set.new end |
Instance Attribute Details
#failed_files ⇒ Object (readonly)
Returns the value of attribute failed_files.
17 18 19 |
# File 'lib/file_indexing/index_agent.rb', line 17 def failed_files @failed_files end |
#indexed_content ⇒ Object (readonly)
Returns the value of attribute indexed_content.
17 18 19 |
# File 'lib/file_indexing/index_agent.rb', line 17 def indexed_content @indexed_content end |
Class Method Details
.create_shallow_instance(filename) ⇒ Object
153 154 155 156 157 158 159 160 161 |
# File 'lib/file_indexing/index_agent.rb', line 153 def IndexAgent.create_shallow_instance(filename) return nil unless File.exists?(filename) file_stats = File.lstat(filename) file_mtime = IndexAgent.get_correct_mtime(filename) # return instance shallow representation (no server) [file_stats.size, "%s,%s,%s" % [`hostname`.strip , file_stats.dev.to_s , File.(filename)], file_mtime.to_i] end |
.get_checksum(filename) ⇒ Object
Calculate file checksum (SHA1)
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/file_indexing/index_agent.rb', line 29 def self.get_checksum(filename) digest = Digest::SHA1.new begin File.open(filename, 'rb') { |f| while buffer = f.read(65536) do digest << buffer end } Log.debug1("#{filename} sha1 #{digest.hexdigest.downcase}") digest.hexdigest.downcase rescue Errno::EACCES, Errno::ETXTBSY => exp Log.warning("#{exp.}") false end end |
.get_content_checksum(content) ⇒ Object
45 46 47 48 49 50 |
# File 'lib/file_indexing/index_agent.rb', line 45 def IndexAgent.get_content_checksum(content) # Calculate checksum. digest = Digest::SHA1.new digest << content digest.hexdigest.downcase end |
.get_correct_mtime(file) ⇒ Object
TODO(kolman): Replace this with File.lstat(file).mtime when new version of Ruby comes out. bugs.ruby-lang.org/issues/6385
60 61 62 63 64 65 66 67 |
# File 'lib/file_indexing/index_agent.rb', line 60 def IndexAgent.get_correct_mtime(file) begin File.open(file, 'r') { |f| f.mtime } rescue Errno::EACCES => e Log.warning("Could not open file #{file} to get mtime. #{e}") return 0 end end |
.global_path(filename) ⇒ Object
163 164 165 166 167 |
# File 'lib/file_indexing/index_agent.rb', line 163 def IndexAgent.global_path(filename) server_name = `hostname`.strip file_stats = File.lstat(filename) return "%s,%s,%s" % [server_name, file_stats.dev.to_s,filename] end |
Instance Method Details
#collect(pattern) ⇒ Object
get all files satisfying the pattern
54 55 56 |
# File 'lib/file_indexing/index_agent.rb', line 54 def collect(pattern) Dir.glob(pattern.to_s) end |
#index(patterns, otherDB = nil) ⇒ Object
index device according to the pattern store the result does not adds automatically otherDB to stored result TODO device support
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/file_indexing/index_agent.rb', line 73 def index(patterns, otherDB = nil) abort "#{self.class}: DB not empty. Current implementation permits only one running of index" \ unless @indexed_content.empty? local_server_name = `hostname`.strip permit_patterns = [] forbid_patterns = [] otherDB_updated = ContentData::ContentData.new #otherDB_table = Hash.new # contains instances from given DB while full path name is a key and instance is a value #otherDB_contents = Hash.new # given DB contents # if there is a given DB then populate table with files # that was already indexed on this server/device if !otherDB.nil? otherDB.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path| if (server == local_server_name) # add instance otherDB_updated.add_instance(checksum, size, server, path, instance_mod_time) end } end permit_patterns = patterns.positive_patterns forbid_patterns = patterns.negative_patterns # add files found by positive patterns files = Array.new permit_patterns.each_index do |i| files = files | (collect(permit_patterns[i])); end Log.debug1 "Files: #{files}." # expand to absolute pathes files.map! {|f| File.(f)} # remove files found by negative patterns forbid_patterns.each_index do |i| forbid_files = Array.new(collect(forbid_patterns[i])); forbid_files.each do |f| files.delete(File.(f)) end end # create and add contents and instances files.each do |file| file_stats = File.lstat(file) file_mtime = IndexAgent.get_correct_mtime(file) device = file_stats.dev.to_s # index only files next if file_stats.directory? # add files present in the given DB to the DB and remove these files # from further processing (save checksum calculation) file_match = false otherDB_updated.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path| if otherDB_updated.instance_exists(file, local_server_name) if size == file_stats.size and instance_mod_time == file_mtime.to_i @indexed_content.add_instance(checksum, size, server, file, instance_mod_time) file_match = true break else Log.warning("File (#{file}) size or modification file is different. size=#{size} actual size=#{file_stats.size}" + \ " instance_mod_time=#{Time.at(instance_mod_time)} actual=#{file_mtime}") end end } next if file_match # calculate a checksum unless (checksum = self.class.get_checksum(file)) Log.warning("Cheksum failure: " + file) @failed_files.add(file) next end @indexed_content.add_instance(checksum, file_stats.size, local_server_name, File.(file), file_mtime.to_i) end end |