Class: FileIndexing::IndexAgent

Inherits:
Object
  • Object
show all
Defined in:
lib/file_indexing/index_agent.rb

Constant Summary collapse

LOCALTZ =

Why are those lines needed?

Time.now.zone

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeIndexAgent

Returns a new instance of IndexAgent.



23
24
25
26
# File 'lib/file_indexing/index_agent.rb', line 23

def initialize
  @indexed_content = ContentData::ContentData.new
  @failed_files = Set.new
end

Instance Attribute Details

#failed_filesObject (readonly)

Returns the value of attribute failed_files.



17
18
19
# File 'lib/file_indexing/index_agent.rb', line 17

def failed_files
  @failed_files
end

#indexed_contentObject (readonly)

Returns the value of attribute indexed_content.



17
18
19
# File 'lib/file_indexing/index_agent.rb', line 17

def indexed_content
  @indexed_content
end

Class Method Details

.create_shallow_instance(filename) ⇒ Object



153
154
155
156
157
158
159
160
161
# File 'lib/file_indexing/index_agent.rb', line 153

def IndexAgent.create_shallow_instance(filename)
  return nil unless File.exists?(filename)
  file_stats = File.lstat(filename)
  file_mtime = IndexAgent.get_correct_mtime(filename)
  # return instance shallow representation (no server)
  [file_stats.size,
   "%s,%s,%s" % [`hostname`.strip , file_stats.dev.to_s , File.expand_path(filename)],
   file_mtime.to_i]
end

.get_checksum(filename) ⇒ Object

Calculate file checksum (SHA1)



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/file_indexing/index_agent.rb', line 29

def self.get_checksum(filename)
  digest = Digest::SHA1.new
  begin
    File.open(filename, 'rb') { |f|
      while buffer = f.read(65536) do
        digest << buffer
      end
    }
    Log.debug1("#{filename} sha1 #{digest.hexdigest.downcase}")
    digest.hexdigest.downcase
  rescue Errno::EACCES, Errno::ETXTBSY => exp
    Log.warning("#{exp.message}")
    false
  end
end

.get_content_checksum(content) ⇒ Object



45
46
47
48
49
50
# File 'lib/file_indexing/index_agent.rb', line 45

def IndexAgent.get_content_checksum(content)
  # Calculate checksum.
  digest = Digest::SHA1.new
  digest << content
  digest.hexdigest.downcase
end

.get_correct_mtime(file) ⇒ Object

TODO(kolman): Replace this with File.lstat(file).mtime when new version of Ruby comes out. bugs.ruby-lang.org/issues/6385



60
61
62
63
64
65
66
67
# File 'lib/file_indexing/index_agent.rb', line 60

def IndexAgent.get_correct_mtime(file)
  begin
    File.open(file, 'r') { |f| f.mtime }
  rescue Errno::EACCES => e
    Log.warning("Could not open file #{file} to get mtime. #{e}")
    return 0
  end
end

.global_path(filename) ⇒ Object



163
164
165
166
167
# File 'lib/file_indexing/index_agent.rb', line 163

def IndexAgent.global_path(filename)
  server_name = `hostname`.strip
  file_stats = File.lstat(filename)
  return "%s,%s,%s" % [server_name, file_stats.dev.to_s,filename]
end

Instance Method Details

#collect(pattern) ⇒ Object

get all files satisfying the pattern



54
55
56
# File 'lib/file_indexing/index_agent.rb', line 54

def collect(pattern)
  Dir.glob(pattern.to_s)
end

#index(patterns, otherDB = nil) ⇒ Object

index device according to the pattern store the result does not adds automatically otherDB to stored result TODO device support



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/file_indexing/index_agent.rb', line 73

def index(patterns, otherDB = nil)
  abort "#{self.class}: DB not empty. Current implementation permits only one running of index" \
        unless @indexed_content.empty?
  local_server_name = `hostname`.strip
  permit_patterns = []
  forbid_patterns = []
  otherDB_updated = ContentData::ContentData.new
  #otherDB_table = Hash.new   # contains instances from given DB while full path name is a key and instance is a value
  #otherDB_contents = Hash.new  # given DB contents

  # if there is a given DB then populate table with files
  # that was already indexed on this server/device
  if !otherDB.nil?
    otherDB.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
      if (server == local_server_name)
        # add instance
        otherDB_updated.add_instance(checksum, size, server, path, instance_mod_time)
      end
    }
  end

  permit_patterns = patterns.positive_patterns
  forbid_patterns = patterns.negative_patterns

  # add files found by positive patterns
  files = Array.new
  permit_patterns.each_index do |i|
    files = files | (collect(permit_patterns[i]));
  end

  Log.debug1 "Files: #{files}."

  # expand to absolute pathes
  files.map! {|f| File.expand_path(f)}

  # remove files found by negative patterns
  forbid_patterns.each_index do |i|
    forbid_files = Array.new(collect(forbid_patterns[i]));
    forbid_files.each do |f|
      files.delete(File.expand_path(f))
    end
  end

  # create and add contents and instances
  files.each do |file|
    file_stats = File.lstat(file)
    file_mtime = IndexAgent.get_correct_mtime(file)
    device = file_stats.dev.to_s

    # index only files
    next if file_stats.directory?

    # add files present in the given DB to the DB and remove these files
    # from further processing (save checksum calculation)
    file_match = false
    otherDB_updated.each_instance { |checksum, size, content_mod_time, instance_mod_time, server, path|
      if otherDB_updated.instance_exists(file, local_server_name)
        if size == file_stats.size and instance_mod_time == file_mtime.to_i
          @indexed_content.add_instance(checksum, size, server, file, instance_mod_time)
          file_match = true
          break
        else
          Log.warning("File (#{file}) size or modification file is different. size=#{size}  actual size=#{file_stats.size}" + \
                      "   instance_mod_time=#{Time.at(instance_mod_time)}  actual=#{file_mtime}")
        end
      end
    }
    next if file_match
    # calculate a checksum
    unless (checksum = self.class.get_checksum(file))
      Log.warning("Cheksum failure: " + file)
      @failed_files.add(file)
      next
    end

    @indexed_content.add_instance(checksum, file_stats.size, local_server_name,
                                  File.expand_path(file), file_mtime.to_i)
  end
end