Class: FileUtils::FileUtils

Inherits:
Object
  • Object
show all
Defined in:
lib/file_utils/file_utils.rb

Overview

Params.string ‘config_file’, ‘path’ ,‘configuration file path’

Class Method Summary collapse

Class Method Details

Creates directory structure and symlinks with ref_cd structure to the base_cd files and dest as a root dir Parameters: ref_cd [ContentData]

base_cd [ContentData]
dest [String]

Output: ContentData object consists of contents/instances from ref_cd that have no target in base_cd

Raises:

  • (NotImplementedError)


195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/file_utils/file_utils.rb', line 195

def self.mksymlink(ref_cd, base_cd, dest)
  # symlinks are not implemented in Windows
  raise NotImplementedError.new if (RUBY_PLATFORM =~ /mingw/ or RUBY_PLATFORM =~ /ms/ or RUBY_PLATFORM =~ /win/)

  not_found = {}
  not_found_cd = ContentData::ContentData.new
  warnings = ""
  dest.chop! if (dest.end_with?("/") or dest.end_with?("\\"))

  ref_cd.each_instance { |checksum, size, content_mod_time, inst_mod_time, server, path|
    if base_cd.content_exists(checksum)
      symlink_path = dest + path
      ::FileUtils.mkdir_p(File.dirname(symlink_path)) unless (Dir.exists?(File.dirname(symlink_path)))
      File.symlink(path, symlink_path)
    else
      # add instance to not_found cd
      not_found_cd.add_instance(checksum, size, server, path, inst_mod_time)
      warnings << "Warning: base content does not contains:'%s'\n" % checksum
    end
  }
  Log.warning (warnings)
  not_found_cd
end

.runObject



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/file_utils/file_utils.rb', line 35

def self.run
  if Params['command'] == 'mksymlink'
    if Params['ref_cd'].nil?
      Log.error ("--ref_cd is not set")
      return
    end
    ref_cd = ContentData.new()
    ref_cd.from_file(Params['ref_cd'])
    if ref_cd.nil?
      Log.error ("Error loading content data ref_cd=%s" % Params['ref_cd'])
      return
    end

    if Params['base_cd'].nil?
      Log.error ("--base_cd is not set")
      return
    end
    base_cd = ContentData.new()
    base_cd.from_file(Params['base_cd'])
    if base_cd.nil?
      Log.error ("Error loading content data base_cd=%s" % Params['base_cd'])
      return
    end

    if Params['dest'].nil?
      Log.error ("--dest is not set")
      return
    end

    not_found = nil
    begin
      not_found = FileUtils.mksymlink(ref_cd, base_cd, Params['dest'])
    rescue NotImplementedError
      Log.error ("symlinks are unimplemented on this machine")
      return nil
    end
    return not_found
  elsif (Params['command'] == "merge" ||
         Params['command'] == "intersect" ||
         Params['command'] == "minus")
    content_data_command
  elsif Params['command'] == 'unify_time'
    if  Params['cd'].nil?
      Log.error ("--cd is not set")
      return
    end
    cd = ContentData.new()
    cd.from_file(Params['cd'])
    if cd.nil?
      Log.error ("Error loading content data cd=%s" % Params['cd'])
      return
    end
    output = unify_time(cd)
    # indexer
  elsif Params['command'] == 'indexer'
    if Params['patterns'].nil?
      Log.error ("--patterns is not set")
      return
    end

    patterns = FileIndexing::IndexerPatterns.new
    Params['patterns'].split(':').each { |pattern|
      Log.debug1 "Pattern: #{pattern}"
      patterns.add_pattern File.expand_path(pattern)
    }

    unless patterns.size > 0
      Log.error ("Error loading patterns=%s (empty file)" % Params['patterns'])
      return
    end

    exist_cd = nil
    if not Params['exist_cd'].nil?
      exist_cd = ContentData::ContentData.new()
      exist_cd.from_file(Params['exist_cd'])
      if exist_cd.nil?
        Log.error ("Error loading content data exist_cd=%s" % Params['exist_cd'])
        return
      end
    end
    indexer = FileIndexing::IndexAgent.new
    indexer.index(patterns, exist_cd)
    Log.debug1 indexer.indexed_content.to_s
    # crawler
  elsif Params['command'] == 'crawler'
    if Params['conf_file'].nil?
      Log.error ("--conf_file is not set")
      return
    end
    if not File.exists?(Params['conf_file'])
      Log.error ("config file doesn't exist conf_file=%s" % Params['conf_file'])
      return
    end

    if Params['cd_out'].nil?
      time = Tme.now.utc
      Params['cd_out'] = "crawler.out.#{time.strftime('%Y/%m/%d_%H-%M-%S')}"
    end
    unless (Params['cd_in'].nil?)
      if not File.exists?(Params['conf_file'])
        Log.error ("input data file doesn't exist cd_in=%s" % Params['cd_in'])
        return
      end
    end

    conf = Configuration.new(Params['conf_file'])
    threads = Array.new
    conf.server_conf_vec.each do |server|
      threads.push(Thread.new { Crawler.new(server, Params['cd_out'], Params['cd_in']) })
    end

    threads.each { |a| a.join }
    join_servers_results(conf.server_conf_vec, Params['cd_out'])
  elsif Params['command'] == 'generate_files'
    fg = FileGenerator::FileGenerator.new()
    fg.run()
  end
end

.unify_time(content_data) ⇒ Object

then the assumption is that this file wasnt indexized and it will not be treated

(e.i. we do nothing with it)


164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/file_utils/file_utils.rb', line 164

def self.unify_time(content_data)
  orig_content_data = ContentData::ContentData.new(content_data)
  content_data.unify_time
  content_data.each_instance { |checksum, size, content_mod_time, unified_inst_mod_time, server, path|
    location = [server, path]
    orig_inst_mod_time = orig_content_data.get_instance_mod_time(checksum, location)
    file_mtime, file_size = File.open(path) { |f| [f.mtime, f.size] }
    Log.debug1 "file:#{path} file_mtime:#{file_mtime}."
    Log.debug1 "update mtime:#{unified_inst_mod_time}"
    Log.debug1 "original instance mtime:#{orig_inst_mod_time}."
    Log.debug1 "unify instance mtime:#{unified_inst_mod_time}."
    Log.debug1 "Comparison: Real file = unified? #{file_mtime.to_i == unified_inst_mod_time}"
    if (file_mtime.to_i == orig_inst_mod_time) \
          and file_size == size \
          and (file_mtime.to_i != unified_inst_mod_time)
      Log.debug1 ("Comparison results: File actual time is same as instance time before unification. Need to modify file time")
      File.utime(File.atime(path), unified_inst_mod_time, path)
      file_mtime = File.open(path) { |f| f.mtime }
      Log.debug1 "new file mtime:#{file_mtime}."
      Log.debug1 "new file mtime in seconds:#{file_mtime.to_i}."
    end
  }
  content_data
end