Module: Dry

Extended by:
Dry
Included in:
Dry
Defined in:
lib/dry-file.rb,
lib/dry/file.rb,
lib/dry/file/version.rb

Defined Under Namespace

Modules: File

Instance Method Summary collapse

Instance Method Details

#diff_size(line_1, line_2) ⇒ Object

Returns the number of differing characters between two lines.



42
43
44
# File 'lib/dry-file.rb', line 42

def diff_size(line_1, line_2)
  line_1.chars.each_with_index.filter_map{|c,i| line_1[i] != line_2[i]}.size
end

#run(max_chars, file, *options) ⇒ Object

Usage

dry 40 test/data/sqli.txt

Params

max_chars: minimum number of different characters to keep a line. file: input file. options: ‘-i’ replace file in-place.



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/dry-file.rb', line 15

def run(max_chars, file, *options)
  max_chars = max_chars.to_i
  previous_line = ''
  file_out = if options.include?('-i')
    File.basename(file)
  else
    "dry-#{File.basename(file)}"
  end

  temp_file = Tempfile.new(File.basename(file))

  File.open(file).each_line do |l|
    # We compare the line size as well as the line similarity. We could have
    # different treshold values for each of these comparisons (eg use
    # +max_chars+ for the size comparison and +max_diff_chars+ for the
    # similarity comparison, but i think using +max_chars+ for both
    # comparisons is also OK, the results seem pretty good.
    if (previous_line.size - l.size).abs > max_chars || diff_size(previous_line, l) > max_chars
      temp_file.write l
      previous_line = l
    end
  end
  temp_file.close
  FileUtils.mv(temp_file.path, File.join(File.dirname(file), file_out))
end