Class: Phonetics::String

Inherits:
SimpleDelegator
  • Object
show all
Defined in:
lib/phonetics/distances.rb

Overview

This subclass of the stdlib’s String allows us to iterate over each phoneme in a string without monkeypatching

Usage:

Phonetics::String.new("wətɛvɝ").each_phoneme.to_a
=> ["w", "ə", "t", "ɛ", "v", "ɝ"]

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.phonemes_by_lengthObject

Group all phonemes by how many characters they have. Use this to walk through a string finding phonemes (looking for longest ones first)



17
18
19
20
21
22
23
24
25
# File 'lib/phonetics/distances.rb', line 17

def self.phonemes_by_length
  @phonemes_by_length ||= Phonetics.phonemes.each_with_object(
    # This relies on the impicit stable key ordering of Hash objects in Ruby
    # 2+ to keep the keys in descending order.
    4 => Set.new, 3 => Set.new, 2 => Set.new, 1 => Set.new
  ) do |str, acc|
    acc[str.chars.size] << str
  end
end

Instance Method Details

#each_phonemeObject



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/phonetics/distances.rb', line 27

def each_phoneme
  idx = 0
  Enumerator.new do |y|
    while idx < chars.length
      found = false
      self.class.phonemes_by_length.each do |size, phonemes|
        next unless idx + size <= chars.length

        candidate = chars[idx..idx + size - 1].join
        next unless phonemes.include?(candidate)

        y.yield candidate
        idx += size
        found = true
        break
      end
      idx += 1 unless found
    end
  end
end