Class: TextAlignment::LCSMin
- Inherits:
-
Object
- Object
- TextAlignment::LCSMin
- Defined in:
- lib/text_alignment/lcs_min.rb
Overview
It finds minimal lcs and sdiff of the given strings, str1 and str2. It relies on the diff-lcs gem for the computation of lcs table.
Constant Summary collapse
- PLACEHOLDER_CHAR =
'_'
Instance Attribute Summary collapse
-
#lcs ⇒ Object
readonly
Returns the value of attribute lcs.
-
#m1_final ⇒ Object
readonly
Returns the value of attribute m1_final.
-
#m1_initial ⇒ Object
readonly
Returns the value of attribute m1_initial.
-
#m2_final ⇒ Object
readonly
Returns the value of attribute m2_final.
-
#m2_initial ⇒ Object
readonly
Returns the value of attribute m2_initial.
-
#sdiff ⇒ Object
readonly
Returns the value of attribute sdiff.
Instance Method Summary collapse
- #_find_min_range(m1_initial, m1_final, m2_initial, m2_final, clcs = 0) ⇒ Object
-
#initialize(str1, str2) ⇒ LCSMin
constructor
A new instance of LCSMin.
- #num_big_gaps(sdiff, initial, last) ⇒ Object
Constructor Details
#initialize(str1, str2) ⇒ LCSMin
Returns a new instance of LCSMin.
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/text_alignment/lcs_min.rb', line 18 def initialize (str1, str2) raise ArgumentError, "nil string" if str1.nil? || str2.nil? raise ArgumentError, "empty string" if str1.empty? || str2.empty? # str1 is copied as it is. # str2 is copied with w/s characters replaced with the placeholder characters, # to avoid overfitting to w/s characters during LCS computation. @str1 = str1 @str2 = str2.gsub(/\s/, PLACEHOLDER_CHAR) # find the corresponding minimal range of the two strings r = _find_min_range(0, @str1.length - 1, 0, @str2.length - 1) if r.nil? @sdiff = nil @lcs = 0 return end @m1_initial, @m1_final, @m2_initial, @m2_final = r[:m1_initial], r[:m1_final], r[:m2_initial], r[:m2_final] if @m1_initial.nil? @sdiff = nil @lcs = 0 else # compute sdiff and lcs # here the original str2 is used with all the w/s characters preserved. @sdiff = Diff::LCS.sdiff(@str1[@m1_initial..@m1_final], str2[@m2_initial..@m2_final]) @lcs = @sdiff.count{|d| d.action == '='} # adjust the position values of sdiff @sdiff.each do |h| h.old_position += @m1_initial unless h.old_position.nil? h.new_position += @m2_initial unless h.new_position.nil? end (0 ... @m2_initial).reverse_each{|i| @sdiff.unshift(Diff::LCS::ContextChange.new('+', nil, nil, i, @str2[i]))} (0 ... @m1_initial).reverse_each{|i| @sdiff.unshift(Diff::LCS::ContextChange.new('-', i, @str1[i], nil, nil))} (@m1_final + 1 ... @str1.length).each{|i| @sdiff.push(Diff::LCS::ContextChange.new('-', i, @str1[i], nil, nil))} (@m2_final + 1 ... @str2.length).each{|i| @sdiff.push(Diff::LCS::ContextChange.new('+', nil, nil, i, @str2[i]))} end end |
Instance Attribute Details
#lcs ⇒ Object (readonly)
Returns the value of attribute lcs.
14 15 16 |
# File 'lib/text_alignment/lcs_min.rb', line 14 def lcs @lcs end |
#m1_final ⇒ Object (readonly)
Returns the value of attribute m1_final.
14 15 16 |
# File 'lib/text_alignment/lcs_min.rb', line 14 def m1_final @m1_final end |
#m1_initial ⇒ Object (readonly)
Returns the value of attribute m1_initial.
14 15 16 |
# File 'lib/text_alignment/lcs_min.rb', line 14 def m1_initial @m1_initial end |
#m2_final ⇒ Object (readonly)
Returns the value of attribute m2_final.
14 15 16 |
# File 'lib/text_alignment/lcs_min.rb', line 14 def m2_final @m2_final end |
#m2_initial ⇒ Object (readonly)
Returns the value of attribute m2_initial.
14 15 16 |
# File 'lib/text_alignment/lcs_min.rb', line 14 def m2_initial @m2_initial end |
#sdiff ⇒ Object (readonly)
Returns the value of attribute sdiff.
14 15 16 |
# File 'lib/text_alignment/lcs_min.rb', line 14 def sdiff @sdiff end |
Instance Method Details
#_find_min_range(m1_initial, m1_final, m2_initial, m2_final, clcs = 0) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/text_alignment/lcs_min.rb', line 60 def _find_min_range (m1_initial, m1_final, m2_initial, m2_final, clcs = 0) return nil if (m1_final - m1_initial < 0) || (m2_final - m2_initial < 0) sdiff = Diff::LCS.sdiff(@str1[m1_initial..m1_final], @str2[m2_initial..m2_final]) lcs = sdiff.count{|d| d.action == '='} return nil if lcs == 0 return nil if lcs < clcs match_last = sdiff.rindex{|d| d.action == '='} m1_final = sdiff[match_last].old_position + m1_initial m2_final = sdiff[match_last].new_position + m2_initial match_first = sdiff.index{|d| d.action == '='} m1_initial = sdiff[match_first].old_position + m1_initial m2_initial = sdiff[match_first].new_position + m2_initial # attempt for shorter match if ((m1_final - m1_initial) > (m2_final - m2_initial)) r = _find_min_range(m1_initial + 1, m1_final, m2_initial, m2_final, lcs) return r unless r.nil? r = _find_min_range(m1_initial, m1_final - 1, m2_initial, m2_final, lcs) return r unless r.nil? else r = _find_min_range(m1_initial, m1_final, m2_initial + 1, m2_final, lcs) return r unless r.nil? r = _find_min_range(m1_initial, m1_final, m2_initial, m2_final - 1, lcs) return r unless r.nil? end return { m1_initial: m1_initial, m1_final: m1_final, m2_initial: m2_initial, m2_final: m2_final } end |
#num_big_gaps(sdiff, initial, last) ⇒ Object
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/text_alignment/lcs_min.rb', line 97 def num_big_gaps (sdiff, initial, last) raise ArgumentError, "nil sdiff" if sdiff.nil? raise ArgumentError, "invalid indice: #{initial}, #{last}" unless last >= initial state1 = :initial state2 = :initial gaps1 = [] gaps2 = [] (initial .. last).each do |i| case sdiff[i].action when '=' state1 = :continue state2 = :continue when '!' gaps1 << 1 state1 = :break if state2 == :break gaps2[-1] += 1 else gaps2 << 1 end state2 = :continue when '+' if state1 == :break gaps1[-1] += 1 else gaps1 << 1 end state1 = :break when '-' if state2 == :break gaps2[-1] += 1 else gaps2 << 1 end state2 = :break end end num_big_gaps1 = gaps1.select{|g| g > MAX_LEN_BIG_GAP}.length num_big_gaps2 = gaps2.select{|g| g > MAX_LEN_BIG_GAP}.length num_big_gaps1 + num_big_gaps2 end |