Class: HTMLDiff::DiffBuilder
- Inherits:
-
Object
- Object
- HTMLDiff::DiffBuilder
- Defined in:
- lib/htmldiff.rb
Constant Summary collapse
- VALID_METHODS =
[:replace, :insert, :delete, :equal]
Instance Method Summary collapse
- #add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) ⇒ Object
- #add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) ⇒ Object
- #add_special_attribute(opening_tag, type) ⇒ Object
- #build ⇒ Object
- #closing_tag?(item) ⇒ Boolean
- #convert_html_to_list_of_words(x, use_brackets = false) ⇒ Object
- #delete(operation, tagclass = 'diffdel') ⇒ Object
- #end_of_tag?(char) ⇒ Boolean
- #equal(operation) ⇒ Object
- #explode(sequence) ⇒ Object
- #extract_consecutive_words(words, &condition) ⇒ Object
- #find_match(start_in_old, end_in_old, start_in_new, end_in_new) ⇒ Object
- #index_new_words ⇒ Object
-
#initialize(old_version, new_version, dual = false) ⇒ DiffBuilder
constructor
A new instance of DiffBuilder.
- #insert(operation, tagclass = 'diffins') ⇒ Object
-
#insert_tag(tagname, cssclass, words, content) ⇒ Object
This method encloses words within a specified tag (ins or del), and adds this into @content, with a twist: if there are words contain tags, it actually creates multiple ins or del, so that they don’t include any ins or del.
- #matching_blocks ⇒ Object
- #opening_tag?(item) ⇒ Boolean
- #operations ⇒ Object
- #perform_operation(operation) ⇒ Object
- #recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) ⇒ Object
- #replace(operation) ⇒ Object
- #split_inputs_to_words ⇒ Object
- #start_of_tag?(char) ⇒ Boolean
- #tag?(item) ⇒ Boolean
- #whitespace?(char) ⇒ Boolean
- #wrap_text(text, tagname, cssclass) ⇒ Object
Constructor Details
#initialize(old_version, new_version, dual = false) ⇒ DiffBuilder
Returns a new instance of DiffBuilder.
18 19 20 21 22 23 24 25 26 27 |
# File 'lib/htmldiff.rb', line 18 def initialize(old_version, new_version, dual = false) @old_version, @new_version = old_version, new_version @dual = dual if @dual @new_content = [] @old_content = [] else @content = [] end end |
Instance Method Details
#add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) ⇒ Object
150 151 152 153 154 155 156 157 158 159 |
# File 'lib/htmldiff.rb', line 150 def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) while match_in_old > start_in_old and match_in_new > start_in_new and @old_words[match_in_old - 1] == @new_words[match_in_new - 1] match_in_old -= 1 match_in_new -= 1 match_size += 1 end [match_in_old, match_in_new, match_size] end |
#add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) ⇒ Object
161 162 163 164 165 166 167 168 |
# File 'lib/htmldiff.rb', line 161 def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) while match_in_old + match_size < end_in_old and match_in_new + match_size < end_in_new and @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size] match_size += 1 end [match_in_old, match_in_new, match_size] end |
#add_special_attribute(opening_tag, type) ⇒ Object
227 228 229 |
# File 'lib/htmldiff.rb', line 227 def add_special_attribute opening_tag, type opening_tag.sub('>', ' difftype="' + type + '">') end |
#build ⇒ Object
29 30 31 32 33 34 |
# File 'lib/htmldiff.rb', line 29 def build split_inputs_to_words index_new_words operations.each { |op| perform_operation(op) } return @dual ? [ @old_content.join, @new_content.join ] : @content.join end |
#closing_tag?(item) ⇒ Boolean
204 205 206 |
# File 'lib/htmldiff.rb', line 204 def closing_tag?(item) item =~ %r!^\s*</[^>]+>\s*$! end |
#convert_html_to_list_of_words(x, use_brackets = false) ⇒ Object
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 |
# File 'lib/htmldiff.rb', line 283 def convert_html_to_list_of_words(x, use_brackets = false) mode = :char current_word = '' words = [] explode(x).each do |char| case mode when :tag if end_of_tag? char current_word << (use_brackets ? ']' : '>') words << current_word current_word = '' if whitespace?(char) mode = :whitespace else mode = :char end else current_word << char end when :char if start_of_tag? char words << current_word unless current_word.empty? current_word = (use_brackets ? '[' : '<') mode = :tag elsif /\s/.match char words << current_word unless current_word.empty? current_word = char mode = :whitespace else current_word << char end when :whitespace if start_of_tag? char words << current_word unless current_word.empty? current_word = (use_brackets ? '[' : '<') mode = :tag elsif /\s/.match char current_word << char else words << current_word unless current_word.empty? current_word = char mode = :char end else raise "Unknown mode #{mode.inspect}" end end words << current_word unless current_word.empty? words end |
#delete(operation, tagclass = 'diffdel') ⇒ Object
186 187 188 |
# File 'lib/htmldiff.rb', line 186 def delete(operation, tagclass = 'diffdel') insert_tag('del', tagclass, @old_words[operation.start_in_old...operation.end_in_old], @dual ? @old_content : @content) end |
#end_of_tag?(char) ⇒ Boolean
271 272 273 |
# File 'lib/htmldiff.rb', line 271 def end_of_tag?(char) char == '>' end |
#equal(operation) ⇒ Object
190 191 192 193 194 195 196 197 198 |
# File 'lib/htmldiff.rb', line 190 def equal(operation) # no tags to insert, simply copy the matching words from one of the versions if @dual @old_content += @old_words[operation.start_in_old...operation.end_in_old] @new_content += @new_words[operation.start_in_new...operation.end_in_new] else @content += @new_words[operation.start_in_new...operation.end_in_new] end end |
#explode(sequence) ⇒ Object
267 268 269 |
# File 'lib/htmldiff.rb', line 267 def explode(sequence) sequence.is_a?(String) ? sequence.split(//) : sequence end |
#extract_consecutive_words(words, &condition) ⇒ Object
212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
# File 'lib/htmldiff.rb', line 212 def extract_consecutive_words(words, &condition) index_of_first_tag = nil words.each_with_index do |word, i| if !condition.call(word) index_of_first_tag = i break end end if index_of_first_tag return words.slice!(0...index_of_first_tag) else return words.slice!(0..words.length) end end |
#find_match(start_in_old, end_in_old, start_in_new, end_in_new) ⇒ Object
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/htmldiff.rb', line 114 def find_match(start_in_old, end_in_old, start_in_new, end_in_new) best_match_in_old = start_in_old best_match_in_new = start_in_new best_match_size = 0 match_length_at = Hash.new { |h, index| h[index] = 0 } start_in_old.upto(end_in_old - 1) do |index_in_old| new_match_length_at = Hash.new { |h, index| h[index] = 0 } @word_indices[@old_words[index_in_old]].each do |index_in_new| next if index_in_new < start_in_new break if index_in_new >= end_in_new new_match_length = match_length_at[index_in_new - 1] + 1 new_match_length_at[index_in_new] = new_match_length if new_match_length > best_match_size best_match_in_old = index_in_old - new_match_length + 1 best_match_in_new = index_in_new - new_match_length + 1 best_match_size = new_match_length end end match_length_at = new_match_length_at end # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left( # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new) # best_match_in_old, best_match_in_new, match_size = add_matching_words_right( # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new) return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil) end |
#index_new_words ⇒ Object
41 42 43 44 |
# File 'lib/htmldiff.rb', line 41 def index_new_words @word_indices = Hash.new { |h, word| h[word] = [] } @new_words.each_with_index { |word, i| @word_indices[word] << i } end |
#insert(operation, tagclass = 'diffins') ⇒ Object
182 183 184 |
# File 'lib/htmldiff.rb', line 182 def insert(operation, tagclass = 'diffins') insert_tag('ins', tagclass, @new_words[operation.start_in_new...operation.end_in_new], @dual ? @new_content : @content) end |
#insert_tag(tagname, cssclass, words, content) ⇒ Object
This method encloses words within a specified tag (ins or del), and adds this into @content, with a twist: if there are words contain tags, it actually creates multiple ins or del, so that they don’t include any ins or del. This handles cases like old: ‘<p>a</p>’ new: ‘<p>ab</p><p>c</b>’ diff result: ‘<p>a<ins>b</ins></p><p><ins>c</ins></p>’ this still doesn’t guarantee valid HTML (hint: think about diffing a text containing ins or del tags), but handles correctly more cases than the earlier version.
P.S.: Spare a thought for people who write HTML browsers. They live in this … every day.
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
# File 'lib/htmldiff.rb', line 242 def insert_tag(tagname, cssclass, words, content) loop do break if words.empty? = extract_consecutive_words(words) { |word| not tag?(word) } content << wrap_text(.join, tagname, cssclass) unless .empty? break if words.empty? loop do opening = opening_tag? words.first closing = closing_tag? words.first break unless opening || closing if opening content << add_special_attribute(words.shift, cssclass) else content << words.shift end end #content.push(*extract_consecutive_words(words) { |word| tag?(word) }) end end |
#matching_blocks ⇒ Object
93 94 95 96 97 |
# File 'lib/htmldiff.rb', line 93 def matching_blocks matching_blocks = [] recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks) matching_blocks end |
#opening_tag?(item) ⇒ Boolean
200 201 202 |
# File 'lib/htmldiff.rb', line 200 def opening_tag?(item) item =~ %r!^\s*<[^>]+>\s*$! end |
#operations ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/htmldiff.rb', line 46 def operations position_in_old = position_in_new = 0 operations = [] matches = matching_blocks # an empty match at the end forces the loop below to handle the unmatched tails # I'm sure it can be done more gracefully, but not at 23:52 matches << Match.new(@old_words.length, @new_words.length, 0) matches.each_with_index do |match, i| match_starts_at_current_position_in_old = (position_in_old == match.start_in_old) match_starts_at_current_position_in_new = (position_in_new == match.start_in_new) action_upto_match_positions = case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new] when [false, false] :replace when [true, false] :insert when [false, true] :delete else # this happens if the first few words are same in both versions :none end if action_upto_match_positions != :none operation_upto_match_positions = Operation.new(action_upto_match_positions, position_in_old, match.start_in_old, position_in_new, match.start_in_new) operations << operation_upto_match_positions end if match.size != 0 match_operation = Operation.new(:equal, match.start_in_old, match.end_in_old, match.start_in_new, match.end_in_new) operations << match_operation end position_in_old = match.end_in_old position_in_new = match.end_in_new end operations end |
#perform_operation(operation) ⇒ Object
172 173 174 175 |
# File 'lib/htmldiff.rb', line 172 def perform_operation(operation) @operation = operation self.send operation.action, operation end |
#recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) ⇒ Object
99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/htmldiff.rb', line 99 def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) match = find_match(start_in_old, end_in_old, start_in_new, end_in_new) if match if start_in_old < match.start_in_old and start_in_new < match.start_in_new recursively_find_matching_blocks( start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks) end matching_blocks << match if match.end_in_old < end_in_old and match.end_in_new < end_in_new recursively_find_matching_blocks( match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks) end end end |
#replace(operation) ⇒ Object
177 178 179 180 |
# File 'lib/htmldiff.rb', line 177 def replace(operation) delete(operation, 'diffmod') insert(operation, 'diffmod') end |
#split_inputs_to_words ⇒ Object
36 37 38 39 |
# File 'lib/htmldiff.rb', line 36 def split_inputs_to_words @old_words = convert_html_to_list_of_words(explode(@old_version)) @new_words = convert_html_to_list_of_words(explode(@new_version)) end |
#start_of_tag?(char) ⇒ Boolean
275 276 277 |
# File 'lib/htmldiff.rb', line 275 def start_of_tag?(char) char == '<' end |
#tag?(item) ⇒ Boolean
208 209 210 |
# File 'lib/htmldiff.rb', line 208 def tag?(item) opening_tag?(item) or closing_tag?(item) end |
#whitespace?(char) ⇒ Boolean
279 280 281 |
# File 'lib/htmldiff.rb', line 279 def whitespace?(char) char =~ /\s/ end |
#wrap_text(text, tagname, cssclass) ⇒ Object
263 264 265 |
# File 'lib/htmldiff.rb', line 263 def wrap_text(text, tagname, cssclass) %(<#{tagname} class="#{cssclass}">#{text}</#{tagname}>) end |