Module: TraceVisualization::Repetitions

Included in:
Algorithm
Defined in:
lib/trace_visualization/repetitions.rb,
lib/trace_visualization/repetitions_psy.rb,
lib/trace_visualization/repetitions/filter.rb,
lib/trace_visualization/repetitions/context.rb,
lib/trace_visualization/repetitions/concatenation.rb,
lib/trace_visualization/repetitions/incrementation.rb

Defined Under Namespace

Modules: Concatenation, Filter, Incrementation Classes: Context

Class Method Summary collapse

Class Method Details

.char_counter_inc(str, pos, offset) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/trace_visualization/repetitions.rb', line 52

def self.char_counter_inc(str, pos, offset)
  counter = Array.new(256)

  pos.each do |pos|
    if pos + offset - 1 < str.length
      c = str[pos + offset - 1].ord
      counter[c] = [] if counter[c] == nil
      counter[c] << pos        
    end
  end

  counter
end

.decode_psy1_result(result, sa) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
# File 'lib/trace_visualization/repetitions_psy.rb', line 77

def self.decode_psy1_result(result, sa)
  repetitions = []

  result.each do |item|
    positions = (item[:i] .. item[:j]).collect { |idx| sa[idx] }
    repetitions << TraceVisualization::Data::Repetition.new(item[:lcp], positions.sort)
  
  end
  
  repetitions
end

.le_letter(l1, l2) ⇒ Object



73
74
75
# File 'lib/trace_visualization/repetitions_psy.rb', line 73

def self.le_letter(l1, l2)
  (l1 == TraceVisualization::TERMINATION_CHAR || l1 != l2) ? TraceVisualization::TERMINATION_CHAR : l1
end

.naive(str, l) ⇒ Object

The naive approach to finding repetitions in the string. Time complexity: О(n^2)



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/trace_visualization/repetitions.rb', line 6

def self.naive(str, l)
  result = Array.new(l + 1) { [] }
  result[0] << Array.new(str.length) {|idx| idx + 1 }

  for i in 1 .. l
    result[i - 1].each do |item|
      counter = char_counter_inc(str, item, i - 1)
      counter.each do |positions|
        result[i] << positions if positions != nil && positions.size > 1
      end
    end
  end

  result[l].sort
end

.naive_all(str, p_min) ⇒ Object

The naive approach to finding repetitions in the string. Time complexity: О(n^2)



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/trace_visualization/repetitions.rb', line 24

def self.naive_all(str, p_min)
  result = []

  result << []
  result[0] << Array.new(str.length) {|idx| idx + 1 }

  idx = 1
  while true
    result << []
  
    result[idx - 1].each do |item|
      counter = char_counter_inc(str, item, idx - 1)
      counter.each do |positions|
        if positions != nil && positions.size > 1
          result[idx] << positions
          result[idx - 1].delete_if { |item| item == positions }
        end
      end
    end
  
    break if result.last.size == 0
  
    idx += 1
  end

  result[p_min ... -1] || []
end

.psy1(data, p_min, decode_result = true) ⇒ Object

Computes all the complete nonextendible repeats using PSY1 algorithm

Parameters:

  • data (Array)

    Array of objects

  • p_min (Integer)

    The minimum number of positions in which we have repetition

  • decode_result (Boolean) (defaults to: true)

Raises:

  • (ArgumentError)


13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/trace_visualization/repetitions_psy.rb', line 13

def self.psy1(data, p_min, decode_result = true)
  raise ArgumentError, 'Data is empty' if data == nil || data.size == 0
  
  sa  = TraceVisualization::SuffixArray.effective(data)
  lcp = TraceVisualization::LongestCommonPrefix.effective(data, sa, data.size)
  bwt = TraceVisualization::BurrowsWheelerTransform.bwt(data, sa, data.size)
  
  result = psy1_original(lcp, bwt, p_min, data.length)
  result = decode_psy1_result(result, sa) if decode_result
  
  result
end

.psy1_original(_LCP, _BWT, p_min, n) ⇒ Object

PSY1 computes all the complete nonextendible repeats in ‘str’ of length p >= p_min. Complexity: Theta(n)

Article: Fast Optimal Algorithms for Computing All the Repeats is a String by Simon J. Puglisi, William F. Smyth, Munina Yusufu



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/trace_visualization/repetitions_psy.rb', line 32

def self.psy1_original(_LCP, _BWT, p_min, n)
  result = []

  lcp = -1
  lb = 0
  bwt1 = _BWT[0]
        
  _LB = []
  _LB.push(:lcp => lcp, :lb => lb, :bwt => bwt1)
        
  for j in 0 ... n
    lb = j
          
    lcp  = j + 1 < n ? _LCP[j + 1] : -1
    bwt2 = j + 1 < n ? _BWT[j + 1] : TraceVisualization::TERMINATION_CHAR
          
    bwt = le_letter(bwt1, bwt2)
    bwt1 = bwt2
          
    while _LB.last()[:lcp] > lcp
      prev = _LB.pop()
                  
      if prev[:bwt] == TraceVisualization::TERMINATION_CHAR && prev[:lcp] >= p_min
        result.push(:lcp => prev[:lcp], :i => prev[:lb], :j => j)
      end
                  
      lb = prev[:lb]
      _LB.last()[:bwt] = le_letter(prev[:bwt], _LB.last()[:bwt])
      bwt = le_letter(prev[:bwt], bwt)
    end
          
    if _LB.last()[:lcp] == lcp
      _LB.last()[:bwt] = le_letter(_LB.last()[:bwt], bwt)
    else
      _LB.push(:lcp => lcp, :lb => lb, :bwt => bwt)
    end
  end

  result    
end