Class: TextAlignment::LCSMin

Inherits:
Object
  • Object
show all
Defined in:
lib/text_alignment/lcs_min.rb

Overview

It finds minimal lcs and sdiff of the given strings, str1 and str2. It relies on the diff-lcs gem for the computation of lcs table.

Constant Summary collapse

PLACEHOLDER_CHAR =
'_'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str1, str2) ⇒ LCSMin

Returns a new instance of LCSMin.

Raises:

  • (ArgumentError)


18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/text_alignment/lcs_min.rb', line 18

def initialize (str1, str2)
	raise ArgumentError, "nil string" if str1.nil? || str2.nil?
	raise ArgumentError, "empty string" if str1.empty? || str2.empty?

	# str1 is copied as it is.
	# str2 is copied with w/s characters replaced with the placeholder characters,
	# to avoid overfitting to w/s characters during LCS computation.
	@str1 = str1
	@str2 = str2.gsub(/\s/, PLACEHOLDER_CHAR)

	# find the corresponding minimal range of the two strings
	r = _find_min_range(0, @str1.length - 1, 0, @str2.length - 1)
	if r.nil?
		@sdiff = nil
		@lcs = 0
		return
	end

	@m1_initial, @m1_final, @m2_initial, @m2_final = r[:m1_initial], r[:m1_final], r[:m2_initial], r[:m2_final]

	if @m1_initial.nil?
		@sdiff = nil
		@lcs = 0
	else
		# compute sdiff and lcs
		# here the original str2 is used with all the w/s characters preserved.
		@sdiff = Diff::LCS.sdiff(@str1[@m1_initial..@m1_final], str2[@m2_initial..@m2_final])
		@lcs = @sdiff.count{|d| d.action == '='}

		# adjust the position values of sdiff
		@sdiff.each do |h|
			h.old_position += @m1_initial unless h.old_position.nil?
			h.new_position += @m2_initial unless h.new_position.nil?
		end

		(0 ... @m2_initial).reverse_each{|i| @sdiff.unshift(Diff::LCS::ContextChange.new('+', nil, nil, i, @str2[i]))}
		(0 ... @m1_initial).reverse_each{|i| @sdiff.unshift(Diff::LCS::ContextChange.new('-', i, @str1[i], nil, nil))}
		(@m1_final + 1 ... @str1.length).each{|i| @sdiff.push(Diff::LCS::ContextChange.new('-', i, @str1[i], nil, nil))}
		(@m2_final + 1 ... @str2.length).each{|i| @sdiff.push(Diff::LCS::ContextChange.new('+', nil, nil, i, @str2[i]))}
	end
end

Instance Attribute Details

#lcsObject (readonly)

Returns the value of attribute lcs.



14
15
16
# File 'lib/text_alignment/lcs_min.rb', line 14

def lcs
  @lcs
end

#m1_finalObject (readonly)

Returns the value of attribute m1_final.



14
15
16
# File 'lib/text_alignment/lcs_min.rb', line 14

def m1_final
  @m1_final
end

#m1_initialObject (readonly)

Returns the value of attribute m1_initial.



14
15
16
# File 'lib/text_alignment/lcs_min.rb', line 14

def m1_initial
  @m1_initial
end

#m2_finalObject (readonly)

Returns the value of attribute m2_final.



14
15
16
# File 'lib/text_alignment/lcs_min.rb', line 14

def m2_final
  @m2_final
end

#m2_initialObject (readonly)

Returns the value of attribute m2_initial.



14
15
16
# File 'lib/text_alignment/lcs_min.rb', line 14

def m2_initial
  @m2_initial
end

#sdiffObject (readonly)

Returns the value of attribute sdiff.



14
15
16
# File 'lib/text_alignment/lcs_min.rb', line 14

def sdiff
  @sdiff
end

Instance Method Details

#_find_min_range(m1_initial, m1_final, m2_initial, m2_final, clcs = 0) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/text_alignment/lcs_min.rb', line 60

def _find_min_range (m1_initial, m1_final, m2_initial, m2_final, clcs = 0)
	return nil if (m1_final - m1_initial < 0) || (m2_final - m2_initial < 0)
	sdiff = Diff::LCS.sdiff(@str1[m1_initial..m1_final], @str2[m2_initial..m2_final])
	lcs = sdiff.count{|d| d.action == '='}

	return nil if lcs == 0
	return nil if lcs < clcs

	match_last  = sdiff.rindex{|d| d.action == '='}
	m1_final    = sdiff[match_last].old_position + m1_initial
	m2_final    = sdiff[match_last].new_position + m2_initial

	match_first = sdiff.index{|d| d.action == '='}
	m1_initial  = sdiff[match_first].old_position + m1_initial
	m2_initial  = sdiff[match_first].new_position + m2_initial

	# attempt for shorter match
	if ((m1_final - m1_initial) > (m2_final - m2_initial))
		r = _find_min_range(m1_initial + 1, m1_final, m2_initial, m2_final, lcs)
		return r unless r.nil?
		r = _find_min_range(m1_initial, m1_final - 1, m2_initial, m2_final, lcs)
		return r unless r.nil?
	else
		r = _find_min_range(m1_initial, m1_final, m2_initial + 1, m2_final, lcs)
		return r unless r.nil?
		r = _find_min_range(m1_initial, m1_final, m2_initial, m2_final - 1, lcs)
		return r unless r.nil?
	end

	return {
		m1_initial: m1_initial,
		m1_final: m1_final,
		m2_initial: m2_initial,
		m2_final: m2_final
	}
end

#num_big_gaps(sdiff, initial, last) ⇒ Object

Raises:

  • (ArgumentError)


97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/text_alignment/lcs_min.rb', line 97

def num_big_gaps (sdiff, initial, last)
	raise ArgumentError, "nil sdiff" if sdiff.nil?
	raise ArgumentError, "invalid indice: #{initial}, #{last}" unless last >= initial

	state1 = :initial
	state2 = :initial
	gaps1 = []
	gaps2 = []

	(initial .. last).each do |i|
		case sdiff[i].action
		when '='
			state1 = :continue
			state2 = :continue
		when '!'
			gaps1 << 1
			state1 = :break

			if state2 == :break
				gaps2[-1] += 1
			else
				gaps2 << 1
			end
			state2 = :continue
		when '+'
			if state1 == :break
				gaps1[-1] += 1
			else
				gaps1 << 1
			end
			state1 = :break
		when '-'
			if state2 == :break
				gaps2[-1] += 1
			else
				gaps2 << 1
			end
			state2 = :break
		end
	end

	num_big_gaps1 = gaps1.select{|g| g > MAX_LEN_BIG_GAP}.length
	num_big_gaps2 = gaps2.select{|g| g > MAX_LEN_BIG_GAP}.length
	num_big_gaps1 + num_big_gaps2
end