Class: MachineLearningWorkbench::Compressor::VectorQuantization

Inherits:
Object
  • Object
show all
Defined in:
lib/machine_learning_workbench/compressor/vector_quantization.rb

Overview

Standard Vector Quantization

Direct Known Subclasses

CopyVQ, DecayingLearningRateVQ

Constant Summary collapse

SIMIL =
{
  dot: -> (centr, vec) { centr.dot(vec) },
  mse: -> (centr, vec) { -((centr-vec)**2).sum / centr.size }
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ncentrs:, dims:, vrange:, lrate:, simil_type: nil, encoding_type: nil, init_centr_vrange: nil, rseed: Random.new_seed) ⇒ VectorQuantization

Returns a new instance of VectorQuantization.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 9

def initialize ncentrs:, dims:, vrange:, lrate:, simil_type: nil, encoding_type: nil, init_centr_vrange: nil, rseed: Random.new_seed

  @rng = Random.new rseed # TODO: RNG CURRENTLY NOT USED!!

  @ncentrs = ncentrs
  @dims = Array(dims)
  check_lrate lrate # hack: so that we can overload it in dlr_vq
  @lrate = lrate
  @simil_type = simil_type || :dot
  @encoding_type = encoding_type || :ensemble_norm
  @init_centr_vrange ||= vrange
  @vrange = case vrange
    when Array
      raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
      vrange.map &method(:Float)
    when Range
      [vrange.first, vrange.last].map &method(:Float)
    else raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
  end
  init_centrs
  @ntrains = [0]*ncentrs # useful to understand what happens
  @utility = NArray.zeros [ncentrs] # trace how 'useful' are centroids to encodings
  @ncodes = 0
end

Instance Attribute Details

#centrsObject (readonly)

Returns the value of attribute centrs.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def centrs
  @centrs
end

#dimsObject (readonly)

Returns the value of attribute dims.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def dims
  @dims
end

#encoding_typeObject (readonly)

Returns the value of attribute encoding_type.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def encoding_type
  @encoding_type
end

#init_centr_vrangeObject (readonly)

Returns the value of attribute init_centr_vrange.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def init_centr_vrange
  @init_centr_vrange
end

#lrateObject (readonly)

Returns the value of attribute lrate.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def lrate
  @lrate
end

#ncentrsObject (readonly)

Returns the value of attribute ncentrs.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def ncentrs
  @ncentrs
end

#ncodesObject

Returns the value of attribute ncodes.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def ncodes
  @ncodes
end

#ntrainsObject (readonly)

Returns the value of attribute ntrains.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def ntrains
  @ntrains
end

#rngObject (readonly)

Returns the value of attribute rng.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def rng
  @rng
end

#simil_typeObject (readonly)

Returns the value of attribute simil_type.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def simil_type
  @simil_type
end

#utilityObject

Returns the value of attribute utility.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def utility
  @utility
end

#vrangeObject (readonly)

Returns the value of attribute vrange.



5
6
7
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def vrange
  @vrange
end

Instance Method Details

#check_lrate(lrate) ⇒ Object

Verify lrate to be present and withing unit bounds As a separate method only so it can be overloaded in ‘DecayingLearningRateVQ`

Raises:

  • (ArgumentError)


36
37
38
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 36

def check_lrate lrate
  raise ArgumentError, "Pass a `lrate` between 0 and 1" unless lrate&.between?(0,1)
end

#encode(vec, type: encoding_type) ⇒ Object

Encode a vector tracks utility of centroids based on how much they contribute to encoding TODO: ‘encode = Encodings.const_get(type)` in initialize` NOTE: hashes of lambdas or modules cannot access ncodes and utility



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 75

def encode vec, type: encoding_type
  simils = similarities vec
  case type
  when :most_similar
    code = simils.max_index
    @ncodes += 1
    @utility[code] += 1
    code
  when :ensemble
    code = simils
    tot = simils.sum
    tot = 1 if tot < 1e-5  # HACK: avoid division by zero
    contrib = code / tot
    @ncodes += 1
    @utility += (contrib - utility) / ncodes # cumulative moving average
    code
  when :ensemble_norm
    tot = simils.sum
    tot = 1 if tot < 1e-5  # HACK: avoid division by zero
    code = simils / tot
    @ncodes += 1
    @utility += (code - utility) / ncodes # cumulative moving average
    code
  else raise ArgumentError, "Unrecognized encode #{type}"
  end
end

#init_centrs(nc: ncentrs, base: nil, proport: nil) ⇒ Object

Initializes a list of centroids



41
42
43
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 41

def init_centrs nc: ncentrs, base: nil, proport: nil
  @centrs = nc.times.map { new_centr base, proport }
end

#most_similar_centr(vec) ⇒ Array<Integer, Float>

Returns index and similitude of most similar centroid to vector

Returns:

  • (Array<Integer, Float>)

    the index of the most similar centroid, followed by the corresponding similarity



119
120
121
122
123
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 119

def most_similar_centr vec
  simils = similarities vec
  max_idx = simils.max_index
  [max_idx, simils[max_idx]]
end

#new_centr(base = nil, proport = nil) ⇒ Object

Creates a new (random) centroid If a base is passed, this is meshed with the random centroid. This is done to facilitate distributing the training across centroids. TODO: USE RNG HERE!!

Raises:

  • (ArgumentError)


49
50
51
52
53
54
55
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 49

def new_centr base=nil, proport=nil
  raise ArgumentError, "Either both or none" if base.nil? ^ proport.nil?
  # require 'pry'; binding.pry if base.nil? ^ proport.nil?
  ret = NArray.new(*dims).rand(*init_centr_vrange)
  ret = ret * (1-proport) + base * proport if base&&proport
  ret
end

#reconstr_error(vec, code: nil, type: encoding_type) ⇒ NArray

Per-pixel errors in reconstructing vector

Returns:



127
128
129
130
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 127

def reconstr_error vec, code: nil, type: encoding_type
  code ||= encode vec, type: type
  (vec - reconstruction(code, type: type)).abs.sum
end

#reconstruction(code, type: encoding_type) ⇒ Object

Reconstruct vector from its code (encoding)



103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 103

def reconstruction code, type: encoding_type
  case type
  when :most_similar
    centrs[code]
  when :ensemble
    tot = code.reduce :+
    centrs.zip(code).map { |centr, contr| centr*contr/tot }.reduce :+
  when :ensemble_norm
    centrs.zip(code).map { |centr, contr| centr*contr }.reduce :+
  else raise ArgumentError, "unrecognized reconstruction type: #{type}"
  end
end

#similarities(vec, type: simil_type) ⇒ Object

Computes similarities between vector and all centroids

Raises:

  • (NotImplementedError)


63
64
65
66
67
68
69
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 63

def similarities vec, type: simil_type
  raise NotImplementedError if vec.shape.size > 1
  simil_fn = SIMIL[type] || raise(ArgumentError, "Unrecognized simil #{type}")
  NArray[*centrs.map { |centr| simil_fn.call centr, vec }]
  # require 'parallel'
  # NArray[*Parallel.map(centrs) { |c| c.dot(vec).first }]
end

#train(vec_lst, debug: false) ⇒ Object

Train on vector list



143
144
145
146
147
148
149
150
151
152
153
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 143

def train vec_lst, debug: false
  # Two ways here:
  # - Batch: canonical, centrs updated with each vec
  # - Parallel: could be parallel either on simils or on training (?)
  # Unsure on the correctness of either Parallel, let's stick with Batch
  vec_lst.each_with_index do |vec, i|
    trained_idx = train_one vec
    print '.' if debug
    @ntrains[trained_idx] += 1
  end
end

#train_one(vec) ⇒ Integer

Train on one vector

Returns:

  • (Integer)

    index of trained centroid



134
135
136
137
138
139
140
# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 134

def train_one vec
  trg_idx, _simil = most_similar_centr(vec)
  # note: uhm that actually looks like a dot product... maybe faster?
  #   `[c[i], vec].dot([1-lrate, lrate])`
  centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + vec * lrate
  trg_idx
end