Module: BoyerMoore

Defined in:
lib/boyermoore.rb

Overview

ported directly from this version wikipedia: en.wikipedia.org/w/index.php?title=Boyer%E2%80%93Moore_string_search_algorithm&diff=391986850&oldid=391398281 it’s not very rubyish but it works

Class Method Summary collapse

Class Method Details

.compute_prefix(str) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/boyermoore.rb', line 36

def self.compute_prefix(str)
  size = str.length
  k = 0
  result = [0]
  1.upto(size - 1) do |q|
    while (k > 0) && (str[k] != str[q])
      k = result[k-1]
    end
    k += 1 if(str[k] == str[q])
    result[q] = k
  end
  result
end

.needle_matches?(needle, haystack) ⇒ Boolean

Returns:

  • (Boolean)


110
111
112
113
114
115
116
# File 'lib/boyermoore.rb', line 110

def self.needle_matches?(needle, haystack)
  if needle.kind_of?(Regexp)
    needle.match(haystack) ? true : false
  else
    needle == haystack
  end
end

.prepare_badcharacter_heuristic(str) ⇒ Object



50
51
52
53
54
55
56
# File 'lib/boyermoore.rb', line 50

def self.prepare_badcharacter_heuristic(str)
  result = RichHash.new
  0.upto(str.length - 1) do |i|
    result[str[i]] = i
  end
  result
end

.prepare_goodsuffix_heuristic(normal) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/boyermoore.rb', line 58

def self.prepare_goodsuffix_heuristic(normal)
  size = normal.size
  result = []

  reversed = normal.dup.reverse
  prefix_normal = compute_prefix(normal)
  prefix_reversed = compute_prefix(reversed)

  0.upto(size) do |i|
    result[i] = size - prefix_normal[size-1]
  end

  0.upto(size-1) do |i|
    j = size - prefix_reversed[i]
    k = i - prefix_reversed[i]+1
    result[j] = k if result[j] > k
  end
  result
end

.search(haystack, needle) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/boyermoore.rb', line 78

def self.search(haystack, needle)
  needle_len = needle.size
  haystack_len = haystack.size

  return nil if haystack_len == 0
  return haystack if needle_len == 0

  badcharacter = self.prepare_badcharacter_heuristic(needle)
  goodsuffix   = self.prepare_goodsuffix_heuristic(needle)

  s = 0
  while s <= haystack_len - needle_len
    j = needle_len
    while (j > 0) && self.needle_matches?(needle[j-1], haystack[s+j-1])
      j -= 1
    end

    if(j > 0)
      k = badcharacter[haystack[s+j-1]]
      k = -1 unless k
      if (k < j) && (m = j-k-1) > goodsuffix[j]
        s += m
      else
        s += goodsuffix[j]
      end
    else
      return s
    end
  end
  return nil
end