Class: Pocketsphinx::SpeechRecognizer

Inherits:
Object
  • Object
show all
Defined in:
lib/pocketsphinx/speech_recognizer.rb

Overview

Reads audio data from a recordable interface and decodes it into utterances

Essentially orchestrates interaction between Recordable and Decoder, and detects new utterances.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(configuration = nil) ⇒ SpeechRecognizer



11
12
13
# File 'lib/pocketsphinx/speech_recognizer.rb', line 11

def initialize(configuration = nil)
  @configuration = configuration
end

Instance Attribute Details

#configurationObject



23
24
25
# File 'lib/pocketsphinx/speech_recognizer.rb', line 23

def configuration
  @configuration ||= Configuration.default
end

#decoderObject



19
20
21
# File 'lib/pocketsphinx/speech_recognizer.rb', line 19

def decoder
  @decoder ||= Decoder.new(configuration)
end

#recordableObject



15
16
17
# File 'lib/pocketsphinx/speech_recognizer.rb', line 15

def recordable
  @recordable or raise "A SpeechRecognizer must have a recordable interface"
end

Instance Method Details

#in_speech?Boolean



71
72
73
74
# File 'lib/pocketsphinx/speech_recognizer.rb', line 71

def in_speech?
  # Use Pocketsphinx's implementation by default
  decoder.in_speech?
end

#recognize(max_samples = 4096) ⇒ Object

Recognize utterances and yield hypotheses in infinite loop

Splits speech into utterances by detecting silence between them. By default this uses Pocketsphinx’s internal Voice Activity Detection (VAD) which can be configured by adjusting the vad_postspeech, vad_prespeech, and vad_threshold settings.



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/pocketsphinx/speech_recognizer.rb', line 47

def recognize(max_samples = 4096)
  decoder.start_utterance
  @recognizing = true

  recordable.record do
    FFI::MemoryPointer.new(:int16, max_samples) do |buffer|
      loop do
        if in_speech?
          while decoder.in_speech?
            process_audio(buffer, max_samples) or break
          end

          hypothesis = get_hypothesis
          yield hypothesis if hypothesis
        else
          process_audio(buffer, max_samples) or break
        end
      end
    end
  end
ensure
  @recognizing = false
end

#recognizing?Boolean



76
77
78
# File 'lib/pocketsphinx/speech_recognizer.rb', line 76

def recognizing?
  @recognizing == true
end

#reconfigure(configuration = nil) ⇒ Object

Reinitialize the decoder with updated configuration.

See Decoder#reconfigure



33
34
35
36
37
38
# File 'lib/pocketsphinx/speech_recognizer.rb', line 33

def reconfigure(configuration = nil)
  self.configuration = configuration if configuration

  decoder.reconfigure(configuration)
  decoder.start_utterance if recognizing?
end