Class: TextAlignment::GLCSAlignment

Inherits:
Object
  • Object
show all
Defined in:
lib/text_alignment/glcs_alignment.rb

Overview

An instance of this class holds the results of generalized LCS computation for the two strings str1 and str2. an optional dictionary is used for generalized suffix comparision.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str1, str2, mappings = []) ⇒ GLCSAlignment

It initializes the GLCS table for the given two strings, str1 and str2. When the array, mappings, is given, general suffix comparision is performed based on the mappings. Exception is raised when nil given passed to either str1, str2 or dictionary

Raises:

  • (ArgumentError)


32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/text_alignment/glcs_alignment.rb', line 32

def initialize(str1, str2, mappings = [])
	raise ArgumentError, "nil string"     if str1 == nil || str2 == nil
	raise ArgumentError, "nil dictionary" if mappings == nil

	# index the mappings in hash.
	@dic = (mappings + mappings.map{|e| e.reverse}).to_h

	# prefix dictionary
	@pdic = Dictionary.new(mappings.flatten)

	@len1 = str1.length
	@len2 = str2.length

	# add a final marker to the end of the strings
	@str1 = str1 + '_'
	@str2 = str2 + '_'

	# compute the GLCS table
	@glcs = _compute_glcs_table
	@length = @glcs[0][0]

	_trace_glcs_table
end

Instance Attribute Details

#common_elementsObject (readonly)

the elements that are common in the two strings, str1 and str2



19
20
21
# File 'lib/text_alignment/glcs_alignment.rb', line 19

def common_elements
  @common_elements
end

#diff_stringsObject (readonly)

the string of non-mapped characters



25
26
27
# File 'lib/text_alignment/glcs_alignment.rb', line 25

def diff_strings
  @diff_strings
end

#lengthObject (readonly)

The length of GLCS



16
17
18
# File 'lib/text_alignment/glcs_alignment.rb', line 16

def length
  @length
end

#mapped_elementsObject (readonly)

the elements that are mapped to each other in the two strings, str1 and str2



22
23
24
# File 'lib/text_alignment/glcs_alignment.rb', line 22

def mapped_elements
  @mapped_elements
end

#position_map_beginObject (readonly)

The mapping function from str1 to str2



10
11
12
# File 'lib/text_alignment/glcs_alignment.rb', line 10

def position_map_begin
  @position_map_begin
end

#position_map_endObject (readonly)

The mapping function from str1 to str2



10
11
12
# File 'lib/text_alignment/glcs_alignment.rb', line 10

def position_map_end
  @position_map_end
end

#similarity(cut = false) ⇒ Object (readonly)

Computes the similarity of the two strings



93
94
95
# File 'lib/text_alignment/glcs_alignment.rb', line 93

def similarity
  @similarity
end

#str1_match_beginObject (readonly)

The position initial and final position of matching on str1 and str2



13
14
15
# File 'lib/text_alignment/glcs_alignment.rb', line 13

def str1_match_begin
  @str1_match_begin
end

#str1_match_endObject (readonly)

The position initial and final position of matching on str1 and str2



13
14
15
# File 'lib/text_alignment/glcs_alignment.rb', line 13

def str1_match_end
  @str1_match_end
end

#str2_match_beginObject (readonly)

The position initial and final position of matching on str1 and str2



13
14
15
# File 'lib/text_alignment/glcs_alignment.rb', line 13

def str2_match_begin
  @str2_match_begin
end

#str2_match_endObject (readonly)

The position initial and final position of matching on str1 and str2



13
14
15
# File 'lib/text_alignment/glcs_alignment.rb', line 13

def str2_match_end
  @str2_match_end
end

Instance Method Details

#cdiffObject

Returns the character-by-character difference



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/text_alignment/glcs_alignment.rb', line 66

def cdiff
	cdiff1, cdiff2 = '', ''
	p1, p2 = 0, 0
	begin
		s1, s2 = _prefix_eq(@str1[p1...@len1], @str2[p2...@len2])
		if s1 != nil
			l1, l2 = s1.length, s2.length

			cdiff1 += s1; cdiff2 += s2
			if l1 > l2 then cdiff2 += ' ' * (l1 - l2) else cdiff1 += ' ' * (l2 - l1) end
			p1 += s1.length;  p2 += s2.length
		elsif p2 < @len2 && (p1 == @len1 or @glcs[p1][p2 + 1] > @glcs[p1 + 1][p2])
			cdiff1 += ' '
			cdiff2 += @str2[p2]
			p2 += 1
		elsif p1 < @len1 && (p2 == @len2 or @glcs[p1][p2 + 1] <= @glcs[p1 + 1][p2])
			cdiff1 += @str1[p1]
			cdiff2 += ' '
			p1 += 1
		end
	end until p1 == @len1 && p2 == @len2

	return [cdiff1, cdiff2]
end

#show_glcsObject

Prints the GLCS table



57
58
59
60
61
62
63
# File 'lib/text_alignment/glcs_alignment.rb', line 57

def show_glcs
	puts "\t\t" + @str2.split(//).join("\t")
	@glcs.each_with_index do |row, i|
		h = (@str1[i].nil?)? '' : @str1[i]
		puts i.to_s + "\t" + h + "\t" + row.join("\t")
	end
end

#transform_a_span(span) ⇒ Object



109
110
111
# File 'lib/text_alignment/glcs_alignment.rb', line 109

def transform_a_span(span)
	{:begin=>@position_map_begin[span[:begin]], :end=>@position_map_end[span[:end]]}
end

#transform_spans(spans) ⇒ Object



113
114
115
# File 'lib/text_alignment/glcs_alignment.rb', line 113

def transform_spans(spans)
	spans.map{|span| transform_a_span(span)}
end