Module: LogWeaver::PrefixGenerator

Defined in:
lib/log_weaver/prefix_generator.rb

Instance Method Summary collapse

Instance Method Details

#get_file_prefixes(file_paths, min_length = 4) ⇒ Object

given an array of file paths, generate file name prefixes given the following rules:

  1. prefixes have to differ

  2. prefixes have to be at least as long as min_length, unless file name is shorter

  3. if file names match, and are shorter than min_length, grab whole directories from directory path until they don’t match

results are returned as a hash keyed on passed-in file names

Raises:

  • (ArgumentError)


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/log_weaver/prefix_generator.rb', line 13

def get_file_prefixes(file_paths, min_length = 4)
  # pseudocode:
  # sort by base_name length
  # get common prefix of base_names
  # append letters to prefix from file name at least until min_length and all unique
  # prepend directories until all unique

  base_names = []
  expanded_paths = []
  processed_file_paths = {}
  max_base_name_length = 0
  max_path_component_length = 0

  file_paths.each do |fp|
    max_base_name_length = fp.length if fp.length > max_base_name_length
    base_name = File.basename fp
    base_names << base_name
    processed_file_paths[fp] = {}
    processed_file_paths[fp][:base_name] = base_name
    processed_file_paths[fp][:expanded_path] = File.expand_path(fp)
    expanded_paths << processed_file_paths[fp][:expanded_path]
    path_dirs = processed_file_paths[fp][:expanded_path].split('/')
    path_dirs.pop
    processed_file_paths[fp][:path_dirs] = path_dirs
    max_path_component_length = processed_file_paths[fp][:path_dirs].length if processed_file_paths[fp][:path_dirs].length > max_path_component_length
  end

  raise ArgumentError, "File list is not unique." unless expanded_paths.uniq?

  # initialize accumulator data structures with the common prefix
  prefix = get_longest_common_prefix base_names
  prefixes = []
  file_paths.each do |fp|
    processed_file_paths[fp][:prefix] = prefix.dup
    prefixes << processed_file_paths[fp][:prefix]
  end

  # append as many remaining characters from file basename as it will take to take us
  # over min_length and make each prefix unique
  (prefix.length .. max_base_name_length - 1).each do |i|
    file_paths.each do |fp|
      # append an additional letter; note, if nil, to_s will convert it to ""
      processed_file_paths[fp][:prefix] << processed_file_paths[fp][:base_name][i].to_s
    end
    if i+1 >= min_length
      break if prefixes.uniq?
    end
  end

  # prepend dir path components if still not unique
  (max_path_component_length - 1).downto(0) do |i|
    break if prefixes.uniq?
    file_paths.each do |fp|
      processed_file_paths[fp][:prefix].insert(0, processed_file_paths[fp][:path_dirs][i].to_s + "/")
    end
  end

  # pick out the results
  res = {}
  longest_prefix_length = 0
  file_paths.each do |fp|
    res[fp] = processed_file_paths[fp][:prefix]
    longest_prefix_length = res[fp].length if res[fp].length > longest_prefix_length
  end

  file_paths.each do |fp|
    orig_prefix_length = res[fp].length
    res[fp] << ": " << " " * (longest_prefix_length - orig_prefix_length)
  end

  res
end

#get_longest_common_prefix(words) ⇒ Object



86
87
88
89
90
91
92
93
94
95
# File 'lib/log_weaver/prefix_generator.rb', line 86

def get_longest_common_prefix(words)
  words = words.dup
  return nil if words.include? nil
  prefix =  words.shift.dup
  until prefix == ""
    break if words.all?{ |w| w =~ /^#{prefix}/ }
    prefix.chop!
  end
  prefix
end