Class: Diarize::Speaker
- Inherits:
-
Object
- Object
- Diarize::Speaker
- Includes:
- JBLAS, ToRdf
- Defined in:
- lib/diarize/speaker.rb
Constant Summary collapse
- @@log_likelihood_threshold =
-33
- @@detection_threshold =
0.2- @@speakers =
{}
Instance Attribute Summary collapse
-
#gender ⇒ Object
readonly
Returns the value of attribute gender.
-
#model ⇒ Object
Returns the value of attribute model.
-
#model_uri ⇒ Object
Returns the value of attribute model_uri.
-
#normalized ⇒ Object
Returns the value of attribute normalized.
Class Method Summary collapse
- .detection_threshold ⇒ Object
- .detection_threshold=(threshold) ⇒ Object
- .divergence(speaker1, speaker2) ⇒ Object
- .divergence_lium(speaker1, speaker2) ⇒ Object
- .divergence_ruby(speaker1, speaker2) ⇒ Object
- .find_or_create(uri, gender) ⇒ Object
- .load_model(filename) ⇒ Object
- .match(speakers) ⇒ Object
- .match_sets(speakers1, speakers2) ⇒ Object
- .ubm ⇒ Object
Instance Method Summary collapse
-
#initialize(uri = nil, gender = nil, model_file = nil) ⇒ Speaker
constructor
A new instance of Speaker.
- #mean_log_likelihood ⇒ Object
- #mean_log_likelihood=(mll) ⇒ Object
- #namespaces ⇒ Object
- #normalize! ⇒ Object
- #rdf_mapping ⇒ Object
- #same_speaker_as(other) ⇒ Object
- #save_model(filename) ⇒ Object
- #supervector ⇒ Object
- #type_uri ⇒ Object
- #uri ⇒ Object
Constructor Details
#initialize(uri = nil, gender = nil, model_file = nil) ⇒ Speaker
Returns a new instance of Speaker.
35 36 37 38 39 40 |
# File 'lib/diarize/speaker.rb', line 35 def initialize(uri = nil, gender = nil, model_file = nil) @model = Speaker.load_model(model_file) if model_file @uri = uri @gender = gender @normalized = false end |
Instance Attribute Details
#gender ⇒ Object (readonly)
Returns the value of attribute gender.
33 34 35 |
# File 'lib/diarize/speaker.rb', line 33 def gender @gender end |
#model ⇒ Object
Returns the value of attribute model.
32 33 34 |
# File 'lib/diarize/speaker.rb', line 32 def model @model end |
#model_uri ⇒ Object
Returns the value of attribute model_uri.
32 33 34 |
# File 'lib/diarize/speaker.rb', line 32 def model_uri @model_uri end |
#normalized ⇒ Object
Returns the value of attribute normalized.
32 33 34 |
# File 'lib/diarize/speaker.rb', line 32 def normalized @normalized end |
Class Method Details
.detection_threshold ⇒ Object
66 67 68 |
# File 'lib/diarize/speaker.rb', line 66 def self.detection_threshold @@detection_threshold end |
.detection_threshold=(threshold) ⇒ Object
62 63 64 |
# File 'lib/diarize/speaker.rb', line 62 def self.detection_threshold=(threshold) @@detection_threshold = threshold end |
.divergence(speaker1, speaker2) ⇒ Object
79 80 81 82 83 84 85 |
# File 'lib/diarize/speaker.rb', line 79 def self.divergence(speaker1, speaker2) # TODO bundle in mean_log_likelihood to weight down unlikely models? return unless speaker1.model and speaker2.model # MAP Gaussian divergence # See "A model space framework for efficient speaker detection", Interspeech'05 divergence_lium(speaker1, speaker2) end |
.divergence_lium(speaker1, speaker2) ⇒ Object
87 88 89 |
# File 'lib/diarize/speaker.rb', line 87 def self.divergence_lium(speaker1, speaker2) fr.lium.spkDiarization.libModel.Distance.GDMAP(speaker1.model, speaker2.model) end |
.divergence_ruby(speaker1, speaker2) ⇒ Object
91 92 93 |
# File 'lib/diarize/speaker.rb', line 91 def self.divergence_ruby(speaker1, speaker2) SuperVector.divergence(speaker1.supervector, speaker2.supervector) end |
.find_or_create(uri, gender) ⇒ Object
74 75 76 77 |
# File 'lib/diarize/speaker.rb', line 74 def self.find_or_create(uri, gender) return @@speakers[uri] if @@speakers[uri] @@speakers[uri] = Speaker.new(uri, gender) end |
.load_model(filename) ⇒ Object
70 71 72 |
# File 'lib/diarize/speaker.rb', line 70 def self.load_model(filename) read_gmm(filename) end |
.match(speakers) ⇒ Object
105 106 107 |
# File 'lib/diarize/speaker.rb', line 105 def self.match(speakers) speakers.combination(2).select { |s1, s2| s1.same_speaker_as(s2) } end |
.match_sets(speakers1, speakers2) ⇒ Object
95 96 97 98 99 100 101 102 103 |
# File 'lib/diarize/speaker.rb', line 95 def self.match_sets(speakers1, speakers2) matches = [] speakers1.each do |s1| speakers2.each do |s2| matches << [ s1, s2 ] if s1.same_speaker_as(s2) end end matches end |
.ubm ⇒ Object
42 43 44 45 46 47 |
# File 'lib/diarize/speaker.rb', line 42 def self.ubm speaker = Speaker.new speaker.normalized = true speaker.model = Speaker.load_model(File.join(File.(File.dirname(__FILE__)), 'ubm.gmm')) speaker end |
Instance Method Details
#mean_log_likelihood ⇒ Object
49 50 51 |
# File 'lib/diarize/speaker.rb', line 49 def mean_log_likelihood @mean_log_likelihood ? @mean_log_likelihood : model.mean_log_likelihood # Will be NaN if model was loaded from somewhere end |
#mean_log_likelihood=(mll) ⇒ Object
53 54 55 |
# File 'lib/diarize/speaker.rb', line 53 def mean_log_likelihood=(mll) @mean_log_likelihood = mll end |
#namespaces ⇒ Object
145 146 147 |
# File 'lib/diarize/speaker.rb', line 145 def namespaces super.merge 'ws' => 'http://wsarchive.prototype0.net/ontology/' end |
#normalize! ⇒ Object
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# File 'lib/diarize/speaker.rb', line 109 def normalize! unless @normalized # Applies M-Norm from "D-MAP: a Distance-Normalized MAP Estimation of Speaker Models for Automatic Speaker Verification" # to the associated GMM, placing it on a unit hyper-sphere with a UBM centre (model will be at distance one from the UBM # according to GDMAP) # Using supervectors: vector = (1.0 / distance_to_ubm) * vector + (1.0 - 1.0 / distance_to_ubm) * ubm_vector speaker_ubm = Speaker.ubm distance_to_ubm = Math.sqrt(Speaker.divergence(self, speaker_ubm)) model.nb_of_components.times do |k| gaussian = model.components.get(k) gaussian.dim.times do |i| normalized_mean = (1.0 / distance_to_ubm) * gaussian.mean(i) + (1.0 - 1.0 / distance_to_ubm) * speaker_ubm.model.components.get(k).mean(i) gaussian.set_mean(i, normalized_mean) end end @normalized = true end @normalized end |
#rdf_mapping ⇒ Object
157 158 159 |
# File 'lib/diarize/speaker.rb', line 157 def rdf_mapping { 'ws:gender' => gender, 'ws:model' => model_uri, 'ws:mean_log_likelihood' => model.mean_log_likelihood, 'ws:supervector_hash' => supervector.hash.to_s } end |
#same_speaker_as(other) ⇒ Object
129 130 131 132 133 134 135 136 |
# File 'lib/diarize/speaker.rb', line 129 def same_speaker_as(other) # Detection score defined in Ben2005 return unless [ self.mean_log_likelihood, other.mean_log_likelihood ].min > @@log_likelihood_threshold self.normalize! other.normalize! detection_score = 1.0 - Speaker.divergence(other, self) detection_score > @@detection_threshold end |
#save_model(filename) ⇒ Object
57 58 59 60 |
# File 'lib/diarize/speaker.rb', line 57 def save_model(filename) # TODO perhaps a warning if a normalised model is being saved? write_gmm(filename, @model) end |
#supervector ⇒ Object
138 139 140 141 |
# File 'lib/diarize/speaker.rb', line 138 def supervector # TODO: cache only when normalized @supervector ||= SuperVector.generate_from_model(model) end |
#type_uri ⇒ Object
153 154 155 |
# File 'lib/diarize/speaker.rb', line 153 def type_uri 'ws:Speaker' end |
#uri ⇒ Object
149 150 151 |
# File 'lib/diarize/speaker.rb', line 149 def uri @uri end |