Class: FastHtmlDiff::DiffBuilder

Inherits:
Object
  • Object
show all
Defined in:
lib/fast_html_diff.rb

Instance Method Summary collapse

Constructor Details

#initialize(html_str_a, html_str_b, config = {}) ⇒ DiffBuilder

Returns a new instance of DiffBuilder.



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/fast_html_diff.rb', line 6

def initialize(html_str_a,html_str_b,config={})
  # merge specified config with defaults
  @config = default_config.merge(config)
  if config[:tokenizer_regexp].nil?
    if @config[:ignore_punctuation]
      @config[:tokenizer_regexp] = %r{([^A-Za-z0-9]+)}
    else
      @config[:tokenizer_regexp] = %r{(\s+)}
    end
  end

  @word_list = {}
  @insertions = []
  @deletions = []
  @matches = []
  @split_nodes = Hash.new
  @insertion_nodes = Hash.new

  # parse, tokenize and index the input documents
  @a = Nokogiri::HTML(html_str_a)
  @b = Nokogiri::HTML(html_str_b)
  if @config[:simplify_html]
    simplify_html(@a)
    simplify_html(@b)
  end
  index_document(@a, :a)
  index_document(@b, :b)

  # find the insertions and deletions
  diff_words
end

Instance Method Details

#buildObject

build output HTML



39
40
41
42
43
# File 'lib/fast_html_diff.rb', line 39

def build
  # update doc_a with tags for the insertions and deletions
  update_dom
  @a.to_html
end

#statisticsObject

output statistics on insertions and deletions



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/fast_html_diff.rb', line 46

def statistics
  result = {
      insertions: { segments: 0, words: 0, chars: 0 },
      deletions: { segments: 0, words: 0, chars: 0 },
      matches: { segments: 0, words: 0, chars: 0}
  }
  @insertions.each do |i|
    result[:insertions][:segments] += 1
    result[:insertions][:words] += i[:b_end] - i[:b_start] + 1
    result[:insertions][:chars] += @word_list[:b][i[:b_end]][:end_pos] - @word_list[:b][i[:b_start]][:start_pos]
  end
  @deletions.each do |i|
    result[:deletions][:segments] += 1
    result[:deletions][:words] += i[:a_end] - i[:a_start] + 1
    result[:deletions][:chars] += @word_list[:a][i[:a_end]][:end_pos] - @word_list[:a][i[:a_start]][:start_pos]
  end
  @matches.each do |i|
    result[:matches][:segments] += 1
    result[:matches][:words] += i[:a_end] - i[:a_start] + 1
    result[:matches][:chars] += @word_list[:a][i[:a_end]][:end_pos] - @word_list[:a][i[:a_start]][:start_pos]
  end
  result
end