Class: ContentData::ContentData

Inherits:
Object
  • Object
show all
Defined in:
lib/content_data/content_data.rb

Overview

Unfortunately this class is used as mutable for now. So need to be carefull. TODO(kolman): Make this class imutable, but add indexing structure to it. TODO(kolman): Add wrapper to the class to enable dynamic content data (with easy access indexes)

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(copy = nil) ⇒ ContentData

Returns a new instance of ContentData.

Parameters:

  • content_data_serializer_str (String)


125
126
127
128
129
130
131
132
133
134
# File 'lib/content_data/content_data.rb', line 125

def initialize(copy = nil)
  if copy.nil?
    @contents = Hash.new # key is a checksum , value is a refernce to the Content object
    @instances = Hash.new  # key is an instance global path , value is a reference to the ContentInstance object
  else
    # Regenerate only the hashes, the values are immutable.
    @contents = copy.contents.clone
    @instances = copy.instances.clone
  end
end

Instance Attribute Details

#contentsObject (readonly)

Returns the value of attribute contents.



122
123
124
# File 'lib/content_data/content_data.rb', line 122

def contents
  @contents
end

#instancesObject (readonly)

Returns the value of attribute instances.



122
123
124
# File 'lib/content_data/content_data.rb', line 122

def instances
  @instances
end

Class Method Details

.format_time(time) ⇒ Object



265
266
267
268
269
# File 'lib/content_data/content_data.rb', line 265

def self.format_time(time)
  return nil unless time.instance_of?Time
  str = time.to_i.to_s
  return str
end

.intersect(a, b) ⇒ Object

returns the common content in both a and b



341
342
343
344
# File 'lib/content_data/content_data.rb', line 341

def self.intersect(a, b)
  b_minus_a = ContentData.remove(a, b)
  return ContentData.remove(b_minus_a, b)
end

.merge(a, b) ⇒ Object

merges content data a and content data b to a new content data and returns it.



272
273
274
275
276
277
278
279
280
281
282
283
284
# File 'lib/content_data/content_data.rb', line 272

def self.merge(a, b)
  return b unless not a.nil?
  return a unless not b.nil?

  return nil unless a.instance_of?ContentData
  return nil unless b.instance_of?ContentData

  ret = ContentData.new
  ret.merge(a)
  ret.merge(b)

  return ret
end

.parse_time(time_str) ⇒ Object



259
260
261
262
263
# File 'lib/content_data/content_data.rb', line 259

def self.parse_time time_str
  return nil unless time_str.instance_of? String
  seconds_from_epoch = Integer time_str  # Not using to_i here because it does not check string is integer.
  time = Time.at seconds_from_epoch
end

.remove(a, b) ⇒ Object

removed content data a from content data b and returns the new content data.



287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
# File 'lib/content_data/content_data.rb', line 287

def self.remove(a, b)
  return nil unless a.instance_of?ContentData
  return nil unless b.instance_of?ContentData

  ret = ContentData.new

  b.contents.values.each { |content|
    #print "%s - %s\n" % [content.checksum, a.content_exists(content.checksum).to_s]
    ret.add_content(content) unless a.content_exists(content.checksum)
  }

  #Log.info "kaka"

  b.instances.values.each { |instance|
    #print "%s - %s\n" % [instance.checksum, a.content_exists(instance.checksum).to_s]
    ret.add_instance(instance) unless a.content_exists(instance.checksum)
  }

  #print "kuku %s" % ret.contents.size.to_s
  #print "kuku %s" % ret.instances.size.to_s
  return ret
end

.remove_directory(cd, global_dir_path) ⇒ Object



324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# File 'lib/content_data/content_data.rb', line 324

def self.remove_directory(cd, global_dir_path)
  return nil unless cd.instance_of?ContentData

  ret = ContentData.new
  cd.instances.values.each do |instance|
    Log.debug3("global path to check: #{global_dir_path}")
    Log.debug3("instance global path: #{instance.global_path}")
    if instance.global_path.scan(global_dir_path).size == 0
      Log.debug3("Adding instance.")
      ret.add_content(cd.contents[instance.checksum])
      ret.add_instance(instance)
    end
  end
  return ret
end

.remove_instances(a, b) ⇒ Object



310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/content_data/content_data.rb', line 310

def self.remove_instances(a, b)
  return nil unless a.instance_of?ContentData
  return nil unless b.instance_of?ContentData

  ret = ContentData.new
  b.instances.values.each do |instance|
    if !a.instances.key?(instance.global_path)
      ret.add_content(b.contents[instance.checksum])
      ret.add_instance(instance)
    end
  end
  return ret
end

.unify_time(db) ⇒ Object

unify time for all entries with same content to minimal time



347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# File 'lib/content_data/content_data.rb', line 347

def self.unify_time(db)
  mod_db = ContentData.new # resulting ContentData that will consists objects with unified time
  checksum2time = Hash.new # key=checksum value=min_time_for_this_checksum
  checksum2instances = Hash.new # key=checksum value=array_of_instances_with_this_checksum (Will be replaced with ContentData method)

  # populate tables with given ContentData entries
  db.instances.each_value do |instance|
    checksum = instance.checksum
    time = instance.modification_time

    unless (checksum2instances.has_key? checksum)
      checksum2instances[checksum] = []
    end
    checksum2instances[checksum] << instance

    if (not checksum2time.has_key? checksum)
      checksum2time[checksum] = time
    elsif ((checksum2time[checksum] <=> time) > 0)
      checksum2time[checksum] = time
    end
  end

  # update min time table with time information from contents
  db.contents.each do |checksum, content|
    time = content.first_appearance_time
    if (not checksum2time.has_key? checksum)
      checksum2time[checksum] = time
    elsif ((checksum2time[checksum] <=> time) > 0)
      checksum2time[checksum] = time
    end
  end

  # add content entries to the output table. in need of case update time field with found min time
  db.contents.each do |checksum, content|
    time = checksum2time[checksum]
    if ((content.first_appearance_time <=> time) == 0)
      mod_db.add_content(content)
    else
      mod_db.add_content(Content.new(checksum, content.size, time))
    end
  end

  # add instance entries to the output table. in need of case update time field with found min time
  checksum2instances.each do |checksum, instances|
    time = checksum2time[checksum]
    instances.each do |instance|
      if ((instance.modification_time <=> time) == 0)
        mod_db.add_instance(instance)
      else # must be bigger then found min time
        mod_instance = ContentInstance.new(instance.checksum, instance.size,
                                           instance.server_name, instance.device,
                                           instance.full_path, time)
        mod_db.add_instance(mod_instance)
      end
    end
  end
  mod_db
end

Instance Method Details

#==(other) ⇒ Object



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/content_data/content_data.rb', line 177

def ==(other)
  return false if other == nil
  return false unless @contents.size == other.contents.size
  return false unless @instances.size == other.instances.size

  @contents.keys.each { |key|
    if (@contents[key] != other.contents[key])
      Log.info @contents[key].first_appearance_time.to_i
      Log.info other.contents[key].first_appearance_time.to_i
      return false
    end
  }
  @instances.keys.each { |key|
    if (@instances[key] != other.instances[key])
      return false
    end
  }
  return true
end

#add_content(content) ⇒ Object



136
137
138
# File 'lib/content_data/content_data.rb', line 136

def add_content(content)
  @contents[content.checksum] = content
end

#add_instance(instance) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/content_data/content_data.rb', line 140

def add_instance(instance)
  if (not @contents.key?(instance.checksum))
    Log.warning sprintf("Adding instance while it's" +
                            " checksum %s does not exists.\n", instance.checksum)
    Log.warning sprintf("%s\n", instance.to_s)
    return false
  elsif (@contents[instance.checksum].size != instance.size)
    Log.warning 'File size different from content size while same checksum'
    Log.warning instance.to_s
    return false
  end

  key = instance.global_path

  #override file if needed
  @instances[key] = instance
end

#content_exists(checksum) ⇒ Object

TODO rename method with finishing ‘?’, cause it returns a boolean



163
164
165
# File 'lib/content_data/content_data.rb', line 163

def content_exists(checksum)
  @contents.key? checksum
end

#empty?Boolean

Returns:

  • (Boolean)


158
159
160
# File 'lib/content_data/content_data.rb', line 158

def empty?
  @contents.empty?
end

#from_file(filename) ⇒ Object

TODO validation that file indeed contains ContentData missing



217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# File 'lib/content_data/content_data.rb', line 217

def from_file(filename)
  lines = IO.readlines(filename)
  i = 0
  number_of_contents = lines[i].to_i
  i += 1
  number_of_contents.times {
    parameters = lines[i].split(",")
    add_content(Content.new(parameters[0],
                            parameters[1].to_i,
                            ContentData.parse_time(parameters[2])))
    i += 1
  }

  number_of_instances = lines[i].to_i
  i += 1
  number_of_instances.times {
    if lines[i].nil?
      Log.info "lines[i] if nil !!!, Backing filename: #{filename} to #{filename}.bad"
      FileUtils.cp(filename, "#{filename}.bad")
      Log.info lines[i].join("\n")
    end
    parameters = lines[i].split(',')
    # bugfix: if file name consist a comma then parsing based on comma separating fails
    if (parameters.size > 6)
      (5..parameters.size-2).each do |i|
        parameters[4] = [parameters[4], parameters[i]].join(",")
      end
      (5..parameters.size-2).each do |i|
        parameters.delete_at(5)
      end
    end

    add_instance(ContentInstance.new(parameters[0],
                                     parameters[1].to_i,
                                     parameters[2],
                                     parameters[3],
                                     parameters[4],
                                     ContentData.parse_time(parameters[5])))
    i += 1
  }
end

#get_query(variable, params) ⇒ Object

TODO simplify conditions This mehod is experimental and shouldn't be used nil is used to define +/- infinity for to/from method arguments from/to values are exlusive in condition’a calculations Need to take care about ‘==’ operation that is used for object’s comparison. In need of case user should define it’s own ‘==’ implemementation.



511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
# File 'lib/content_data/content_data.rb', line 511

def get_query(variable, params)
  raise RuntimeError.new 'This method is experimental and shouldn\'t be used'

  exact = params['exact'].nil? ? Array.new : params['exact']
  from = params['from']
  to = params ['to']
  is_inside = params['is_inside']

  unless ContentInstance.new.instance_variable_defined?("@#{attribute}")
    raise ArgumentError "#{variable} isn't a ContentInstance variable"
  end

  if (exact.nil? && from.nil? && to.nil?)
    raise ArgumentError 'At least one of the argiments {exact, from, to} must be defined'
  end

  if (!(from.nil? || to.nil?) && from.kind_of?(to.class))
    raise ArgumentError 'to and from arguments should be comparable one with another'
  end

  # FIXME add support for from/to for Strings
  if ((!from.nil? && !from.kind_of?(Numeric.new.class))\
        || (!to.nil? && to.kind_of?(Numeric.new.class)))
    raise ArgumentError 'from and to options supported only for numeric values'
  end

  if (!exact.empty? && (!from.nil? || !to.nil?))
    raise ArgumentError 'exact and from/to options are mutually exclusive'
  end

  result_index = ContentData.new
  instances.each_value do |instance|
    is_match = false
    var_value = instance.instance_variable_get("@#{variable}")

    if exact.include? var_value
      is_match = true
    elsif (from.nil? || var_value > from) && (to.nil? || var_value < to)
      is_match = true
    end

    if (is_match && is_inside) || (!is_match && !is_inside)
      checksum = instance.checksum
      result_index.add_content(contents[checksum]) unless result_index.content_exists(checksum)
      result_index.add_instance instance
    end
  end
  result_index
end

#merge(content_data) ⇒ Object

TODO(kolman): The semantics of thir merge is merge! change in all file.



168
169
170
171
172
173
174
175
# File 'lib/content_data/content_data.rb', line 168

def merge(content_data)
  content_data.contents.values.each { |content|
    add_content(content)
  }
  content_data.instances.values.each { |instance|
    add_instance(instance)
  }
end

#to_file(filename) ⇒ Object



210
211
212
213
214
# File 'lib/content_data/content_data.rb', line 210

def to_file(filename)
  content_data_dir = File.dirname(filename)
  FileUtils.makedirs(content_data_dir) unless File.directory?(content_data_dir)
  File.open(filename, 'w') {|f| f.write(to_s) }
end

#to_sObject



197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/content_data/content_data.rb', line 197

def to_s
  ret = ""
  ret << @contents.length.to_s << "\n"
  @contents.each_value { |content|
    ret << content.to_s << "\n"
  }
  ret << @instances.length.to_s << "\n"
  @instances.each_value { |instance|
    ret << instance.to_s << "\n"
  }
  return ret
end

#validate(params = nil) ⇒ Boolean

Validates index against file system that all instances hold a correct data regarding files that they represrents.

There are two levels of validation, controlled by instance_check_level system parameter:

  • shallow - quick, tests instance for file existence and attributes.

  • deep - can take more time, in addition to shallow recalculates hash sum.

Parameters:

  • params (Hash) (defaults to: nil)

    hash of parameters of validation, can be used to return additional data.

    Supported key/value combinations:

    • key is :failed value is ContentData used to return failed instances

Returns:

  • (Boolean)

    true when index is correct, false otherwise



417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
# File 'lib/content_data/content_data.rb', line 417

def validate(params = nil)
  # used to answer whether specific param was set
  param_exists = Proc.new do |param|
    !(params.nil? || params[param].nil?)
  end

  # used to process method parameters centrally
  process_params = Proc.new do |values|
    # values is a Hash with keys: :content, :instance and value appropriate to key
    if param_exists.call :failed
      unless values[:content].nil?
        params[:failed].add_content values[:content]
      end
      unless values[:instance].nil?
        # appropriate content should be already added
        params[:failed].add_instance values[:instance]
      end
    end
  end

  is_valid = true
  instances.each_value do |instance|
    unless check_instance instance
      is_valid = false

      unless params.nil? || params.empty?
        process_params.call :content => contents[instance.checksum], :instance => instance
      end
    end
  end

  is_valid
end