Module: PhonemEncoder

Included in:
String
Defined in:
lib/phonem_encoder.rb,
lib/phonem_encoder/version.rb

Constant Summary collapse

VERSION =
"0.0.2"

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.encode_string(string, length = 2) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/phonem_encoder.rb', line 5

def encode_string(string, length=2)
  raise("length must be 1 or 2") if ![1,2].include?(length)

  code = ''
  string = string.downcase.gsub('ß','ss').gsub('ä', 'ae').gsub('ü', 'ue').scan(/./)

  string.each_cons(length) do |chars|
    if length == 2
      case chars.join
      when /^(sc|sz|cz|tz|ts)/ then
        code << 'c'
      when /^(ae)/ then
        code << 'e'
      when /^(pf)/ then
        code << 'v'
      when /^(ks)/ then
        code << 'x'
      when /^(qu)/ then
        code << 'kw'
      when /^(ow)/ then
        code << 'ö'
      when /^(ei|ey)/ then
        code << 'ay'
      when /^(eu)/ then
        code << 'oy'
      when /^(ou)/ then
        code << 'u'
      else
        code << chars.join
      end
    else
      case chars.join
      when /^(z|k|g|q)/ then
        code << 'c'
      when /^(i|j)/ then
        code << 'y'
      when /^(f|w)/ then
        code << 'v'
      when /^(p)/ then
        code << 'b'
      when /^(t)/ then
        code << 'd'
      else
        code << chars.join
      end
    end
  end

  code = encode_string(code, 1) if length == 2

  reduce_multiples(code).scan(/[abcdlmnorsuvwxyö]/).join
end

.reduce_multiples(code) ⇒ Object



58
59
60
61
62
63
64
# File 'lib/phonem_encoder.rb', line 58

def reduce_multiples(code)
  unless code.gsub!(/(.)\1/, '\1').nil?
    reduce_multiples(code)
  end

  code
end

Instance Method Details

#phonetic_codeObject

To Encode a string based on the PHONEM algorithm, call encode_string on any String.

"Müller".encode_string

This will output “mylr”.

This can be used for finding strings by their phonetic sound. It is optimized for the german language.



77
78
79
# File 'lib/phonem_encoder.rb', line 77

def phonetic_code
  PhonemEncoder.encode_string self.to_s
end