Method: TextAlignment._find_divisions_old

Defined in:
lib/text_alignment/find_divisions.rb

._find_divisions_old(source, targets) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/text_alignment/find_divisions.rb', line 124

def _find_divisions_old(source, targets)
	mode, m, c, offset_begin = nil, nil, nil, nil

	targets.each_with_index do |target, i|
		if source.size < target[:text].size
			mode = :t_in_s
			str1 = source
			str2 = target[:text]
		else
			mode = :s_in_t
			str1 = target[:text]
			str2 = source
		end

		len1 = str1.length
		len2 = str2.length

		offset_begin, offset_end = 0, -1
		offset_begin, offset_end = approximate_fit(str1, str2) if (len2 - len1) > len1 * (1 - TextAlignment::SIMILARITY_THRESHOLD)

		unless offset_begin.nil?
			c = TextAlignment::LCSComparison.new(str1, str2[offset_begin .. offset_end])
			if (c.similarity > TextAlignment::SIMILARITY_THRESHOLD) && ((len1 - (c.str1_match_final - c.str1_match_initial + 1)) < len1 * (1 - TextAlignment::SIMILARITY_THRESHOLD))
				m = i
				break
			end
		end
	end

	# return remaining source and targets if m.nil?
	return [[-1, [source, targets.collect{|s| s[:divid]}]]] if m.nil?

	index = if mode == :t_in_s
		[targets[m][:divid], [0, source.size]]
	else # :s_in_t
		[targets[m][:divid], [c.str2_match_initial + offset_begin, c.str2_match_final + offset_begin + 1]]
	end

	next_source = source[0 ... index[1][0]] + source[index[1][1] .. -1]
	targets.delete_at(m)

	if next_source.strip.empty? || targets.empty?
		return [index]
	else
		more_index = _find_divisions(next_source, targets)
		gap = index[1][1] - index[1][0]
		more_index.each do |i|
			if (i[0] > -1)
				i[1][0] += gap if i[1][0] >= index[1][0]
				i[1][1] += gap if i[1][1] >  index[1][0]
			end
		end
		return [index] + more_index
	end
end