Class: MachineLearningWorkbench::Compressor::VectorQuantization

Inherits:
Object
  • Object
show all
Defined in:
lib/machine_learning_workbench/compressor/vector_quantization.rb

Overview

Standard Vector Quantization

Direct Known Subclasses

OnlineVectorQuantization

Constant Summary collapse

Verification =
MachineLearningWorkbench::Tools::Verification

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ncentrs:, dims:, vrange:, lrate:, rseed: Random.new_seed) ⇒ VectorQuantization

Returns a new instance of VectorQuantization.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 8

def initialize ncentrs:, dims:, vrange:, lrate:, rseed: Random.new_seed
  @rng = Random.new rseed
  @ncentrs = ncentrs
  @dims = Array(dims)
  check_lrate lrate # hack: so that we can overload it in online_vq
  @lrate = lrate
  @vrange = case vrange
    when Array
      raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
      vrange.map &method(:Float)
    when Range
      [vrange.first, vrange.last].map &method(:Float)
    else raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
  end
  @centrs = ncentrs.times.map { new_centr }
  @ntrains = [0]*ncentrs # useful to understand what happens
end

Instance Attribute Details

#centrsObject (readonly)

Returns the value of attribute centrs.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def centrs
  @centrs
end

#dimsObject (readonly)

Returns the value of attribute dims.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def dims
  @dims
end

#lrateObject (readonly)

Returns the value of attribute lrate.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def lrate
  @lrate
end

#ncentrsObject (readonly)

Returns the value of attribute ncentrs.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def ncentrs
  @ncentrs
end

#ntrainsObject (readonly)

Returns the value of attribute ntrains.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def ntrains
  @ntrains
end

#rngObject (readonly)

Returns the value of attribute rng.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def rng
  @rng
end

#vrangeObject (readonly)

Returns the value of attribute vrange.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def vrange
  @vrange
end

Instance Method Details

#check_lrate(lrate) ⇒ Object

Verify lrate to be present and withing unit bounds As a separate method only so it can be overloaded in online_vq

Raises:

  • (ArgumentError)


28
29
30
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 28

def check_lrate lrate
  raise ArgumentError, "Pass a `lrate` between 0 and 1" unless lrate&.between?(0,1)
end

#encode(vec, type: :most_similar) ⇒ Object

Encode a vector



46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 46

def encode vec, type: :most_similar
  simils = similarities vec
  case type
  when :most_similar
    simils.index simils.max
  when :ensemble
    simils
  when :ensemble_norm
    tot = simils.reduce(:+)
    simils.map { |s| s/tot }
  else raise ArgumentError, "unrecognized encode type: #{type}"
  end
end

#most_similar_centr(vec) ⇒ Array<Integer, Float>

Returns index and similitude of most similar centroid to vector

Returns:

  • (Array<Integer, Float>)

    the index of the most similar centroid, followed by the corresponding similarity



77
78
79
80
81
82
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 77

def most_similar_centr vec
  simils = similarities vec
  max_simil = simils.max
  max_idx = simils.index max_simil
  [max_idx, max_simil]
end

#new_centrObject

Creates a new (random) centroid



33
34
35
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 33

def new_centr
  NArray.new(*dims).rand(*vrange)
end

#reconstr_error(vec) ⇒ NArray

Per-pixel errors in reconstructing vector

Returns:



86
87
88
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 86

def reconstr_error vec
  reconstruction(vec) - vec
end

#reconstruction(code, type: :most_similar) ⇒ Object

Reconstruct vector from its code (encoding)



61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 61

def reconstruction code, type: :most_similar
  case type
  when :most_similar
    centrs[code]
  when :ensemble
    tot = code.reduce :+
    centrs.zip(code).map { |centr, contr| centr*contr/tot }.reduce :+
  when :ensemble_norm
    centrs.zip(code).map { |centr, contr| centr*contr }.reduce :+
  else raise ArgumentError, "unrecognized reconstruction type: #{type}"
  end
end

#similarities(vec) ⇒ Object

Computes similarities between vector and all centroids

Raises:

  • (NotImplementedError)


38
39
40
41
42
43
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 38

def similarities vec
  raise NotImplementedError if vec.shape.size > 1
  centrs.map { |c| c.dot(vec) }
  # require 'parallel'
  # Parallel.map(centrs) { |c| c.dot(vec).first }
end

#train(vec_lst, debug: false) ⇒ Object

Train on vector list



103
104
105
106
107
108
109
110
111
112
113
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 103

def train vec_lst, debug: false
  # Two ways here:
  # - Batch: canonical, centrs updated with each vec
  # - Parallel: could be parallel either on simils or on training (?)
  # Unsure on the correctness of either Parallel, let's stick with Batch
  vec_lst.each_with_index do |vec, i|
    trained_idx = train_one vec
    print '.' if debug
    ntrains[trained_idx] += 1
  end
end

#train_one(vec) ⇒ Integer

Train on one vector

Returns:

  • (Integer)

    index of trained centroid



92
93
94
95
96
97
98
99
100
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 92

def train_one vec

  trg_idx, _simil = most_similar_centr(vec)
  # note: uhm that actually looks like a dot product... optimizable?
  #   `[c[i], vec].dot([1-lrate, lrate])`
  centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + vec * lrate
  # Verification.in_range! centrs[trg_idx], vrange # I verified it's not needed
  trg_idx
end