Class: Phonetic::DoubleMetaphone

Inherits:
Algorithm show all
Defined in:
lib/phonetic/double_metaphone.rb

Overview

The Double Metaphone phonetic encoding algorithm is the second generation of the Metaphone algorithm. Its original implementation was described by Lawrence Philips in the June 2000 issue of C/C++ Users Journal.

This implementation based on the PHP implementation by Stephen Woodbridge and contains modifications of algorithm by Kevin Atkinson.

Examples:

Phonetic::DoubleMetaphone.encode('czerny') # => ['SRN', 'XRN']
Phonetic::DoubleMetaphone.encode('dumb')   # => ['TM', 'TM']
Phonetic::DoubleMetaphone.encode('edgar')  # => ['ATKR', 'ATKR']
# or use alias:
Phonetic::Metaphone2.encode('czerny') # => ['SRN', 'XRN']
Phonetic::Metaphone2.encode('dumb')   # => ['TM', 'TM']
Phonetic::Metaphone2.encode('edgar')  # => ['ATKR', 'ATKR']

See Also:

Class Method Summary collapse

Class Method Details

.encode(str, options = { size: 4 }) ⇒ Object



94
95
96
# File 'lib/phonetic/double_metaphone.rb', line 94

def self.encode(str, options = { size: 4 })
  encode_word(str, options)
end

.encode_word(word, options = { size: 4 }) ⇒ Object

Encode word to its Double Metaphone code.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/phonetic/double_metaphone.rb', line 26

def self.encode_word(word, options = { size: 4 })
  code_size = options[:size] || 4
  w = word.strip.upcase
  code = ['', '']
  def code.add(primary, secondary)
    self[0] += primary
    self[1] += secondary
  end
  i = 0
  len = w.size
  last = len - 1
  # pad the original string so that we can index beyond the edge of the world
  w += ' ' * 5
  i += encode_start_of_word(w, code)
  while i < len && (code.first.size < code_size || code.last.size < code_size)
    case w[i]
    when 'A', 'E', 'I', 'O', 'U', 'Y'
      i += 1
    when 'B'
      # "-mb", e.g", "dumb", already skipped over...
      i += gen_encode(w, i, 'P', 'P', code)
    when 'Ç', 'ç'
      code.add 'S', 'S'
      i += 1
    when 'C'
      i += encode_c(w, i, len, code)
    when 'D'
      i += encode_d(w, i, len, code)
    when 'F', 'K', 'N'
      i += gen_encode(w, i, w[i], w[i], code)
    when 'G'
      i += encode_g(w, i, len, code)
    when 'H'
      i += encode_h(w, i, len, code)
    when 'J'
      i += encode_j(w, i, len, code)
    when 'L'
      i += encode_l(w, i, len, code)
    when 'M'
      i += encode_m(w, i, len, code)
    when 'Ñ', 'ñ'
      code.add 'N', 'N'
      i += 1
    when 'P'
      i += encode_p(w, i, len, code)
    when 'Q'
      i += gen_encode(w, i, 'K', 'K', code)
    when 'R'
      i += encode_r(w, i, len, code)
    when 'S'
      i += encode_s(w, i, len, code)
    when 'T'
      i += encode_t(w, i, len, code)
    when 'V'
      i += gen_encode(w, i, 'F', 'F', code)
    when 'W'
      i += encode_w(w, i, len, code)
    when 'X'
      i += encode_x(w, i, len, code)
    when 'Z'
      i += encode_z(w, i, len, code)
    else
      i += 1
    end
  end
  [code.first[0, code_size], code.last[0, code_size]]
end