Module: TraceVisualization::Repetitions

Defined in:
lib/trace_visualization/repetitions.rb,
lib/trace_visualization/repetitions_psy.rb,
lib/trace_visualization/repetitions_context.rb

Defined Under Namespace

Classes: Context

Class Method Summary collapse

Class Method Details

.char_counter_inc(str, pos, offset) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/trace_visualization/repetitions.rb', line 52

def self.char_counter_inc(str, pos, offset)
  counter = Array.new(256)

  pos.each do |pos|
    if pos + offset - 1 < str.length
      c = str[pos + offset - 1].ord
      counter[c] = [] if counter[c] == nil
      counter[c] << pos        
    end
  end

  counter
end

.decode_psy1_result(result, sa) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
# File 'lib/trace_visualization/repetitions_psy.rb', line 70

def self.decode_psy1_result(result, sa)
  repetitions = []

  result.each do |item|
    positions = (item[:i] .. item[:j]).collect { |idx| sa[idx] }
    repetitions << TraceVisualization::Data::Repetition.new(item[:lcp], positions.sort)
  
  end
  
  repetitions
end

.le_letter(l1, l2) ⇒ Object



66
67
68
# File 'lib/trace_visualization/repetitions_psy.rb', line 66

def self.le_letter(l1, l2)
  (l1 == TraceVisualization::TERMINATION_CHAR || l1 != l2) ? TraceVisualization::TERMINATION_CHAR : l1
end

.naive(str, l) ⇒ Object

The naive approach to finding repetitions in the string. Time complexity: О(n^2)



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/trace_visualization/repetitions.rb', line 6

def self.naive(str, l)
  result = Array.new(l + 1) { [] }
  result[0] << Array.new(str.length) {|idx| idx + 1 }

  for i in 1 .. l
    result[i - 1].each do |item|
      counter = char_counter_inc(str, item, i - 1)
      counter.each do |positions|
        result[i] << positions if positions != nil && positions.size > 1
      end
    end
  end

  result[l].sort
end

.naive_all(str, p_min) ⇒ Object

The naive approach to finding repetitions in the string. Time complexity: О(n^2)



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/trace_visualization/repetitions.rb', line 24

def self.naive_all(str, p_min)
  result = []

  result << []
  result[0] << Array.new(str.length) {|idx| idx + 1 }

  idx = 1
  while true
    result << []
  
    result[idx - 1].each do |item|
      counter = char_counter_inc(str, item, idx - 1)
      counter.each do |positions|
        if positions != nil && positions.size > 1
          result[idx] << positions
          result[idx - 1].delete_if { |item| item == positions }
        end
      end
    end
  
    break if result.last.size == 0
  
    idx += 1
  end

  result[p_min ... -1] || []
end

.psy1(str, p_min, decode_result = true) ⇒ Object



8
9
10
11
12
13
14
15
16
17
# File 'lib/trace_visualization/repetitions_psy.rb', line 8

def self.psy1(str, p_min, decode_result = true)
  sa  = TraceVisualization::SuffixArray.effective(str)
  lcp = TraceVisualization::LongestCommonPrefix.effective(str, sa, str.size)
  bwt = TraceVisualization::BurrowsWheelerTransform.bwt(str, sa, str.length)
  
  result = psy1_original(lcp, bwt, 3, str.length)
  result = decode_psy1_result(result, sa) if decode_result
  
  result
end

.psy1_original(_LCP, _BWT, p_min, n) ⇒ Object

PSY1 computes all the complete nonextendible repeats in ‘str’ of length p >= p_min. Complexity: Theta(n)

Article: Fast Optimal Algorithms for Computing All the Repeats is a String by Simon J. Puglisi, William F. Smyth, Munina Yusufu



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/trace_visualization/repetitions_psy.rb', line 25

def self.psy1_original(_LCP, _BWT, p_min, n)
  result = []

  lcp = -1
  lb = 0
  bwt1 = _BWT[0]
        
  _LB = []
  _LB.push(:lcp => lcp, :lb => lb, :bwt => bwt1)
        
  for j in 0 ... n
    lb = j
          
    lcp  = j + 1 < n ? _LCP[j + 1] : -1
    bwt2 = j + 1 < n ? _BWT[j + 1] : TraceVisualization::TERMINATION_CHAR
          
    bwt = le_letter(bwt1, bwt2)
    bwt1 = bwt2
          
    while _LB.last()[:lcp] > lcp
      prev = _LB.pop()
                  
      if prev[:bwt] == TraceVisualization::TERMINATION_CHAR && prev[:lcp] >= p_min
        result.push(:lcp => prev[:lcp], :i => prev[:lb], :j => j)
      end
                  
      lb = prev[:lb]
      _LB.last()[:bwt] = le_letter(prev[:bwt], _LB.last()[:bwt])
      bwt = le_letter(prev[:bwt], bwt)
    end
          
    if _LB.last()[:lcp] == lcp
      _LB.last()[:bwt] = le_letter(_LB.last()[:bwt], bwt)
    else
      _LB.push(:lcp => lcp, :lb => lb, :bwt => bwt)
    end
  end

  result    
end