Class: MachineLearningWorkbench::Compressor::VectorQuantization

Inherits:

Object

Object
MachineLearningWorkbench::Compressor::VectorQuantization

show all

Defined in:: lib/machine_learning_workbench/compressor/vector_quantization.rb

Overview

Standard Vector Quantization

Direct Known Subclasses

CopyVQ, DecayingLearningRateVQ

Constant Summary collapse

SIMIL =

{
  dot: -> (centr, vec) { centr.dot(vec) },
  mse: -> (centr, vec) { -((centr-vec)**2).sum / centr.size }
}

Instance Attribute Summary collapse

#centrs ⇒ Object readonly

Returns the value of attribute centrs.
#dims ⇒ Object readonly

Returns the value of attribute dims.
#encoding_type ⇒ Object readonly

Returns the value of attribute encoding_type.
#init_centr_vrange ⇒ Object readonly

Returns the value of attribute init_centr_vrange.
#lrate ⇒ Object readonly

Returns the value of attribute lrate.
#ncentrs ⇒ Object readonly

Returns the value of attribute ncentrs.
#ncodes ⇒ Object

Returns the value of attribute ncodes.
#ntrains ⇒ Object readonly

Returns the value of attribute ntrains.
#rng ⇒ Object readonly

Returns the value of attribute rng.
#simil_type ⇒ Object readonly

Returns the value of attribute simil_type.
#utility ⇒ Object

Returns the value of attribute utility.
#vrange ⇒ Object readonly

Returns the value of attribute vrange.

Instance Method Summary collapse

#check_lrate(lrate) ⇒ Object

Verify lrate to be present and withing unit bounds As a separate method only so it can be overloaded in ‘DecayingLearningRateVQ`.
#encode(vec, type: encoding_type) ⇒ Object

Encode a vector tracks utility of centroids based on how much they contribute to encoding TODO: ‘encode = Encodings.const_get(type)` in initialize` NOTE: hashes of lambdas or modules cannot access ncodes and utility.
#init_centrs(nc: ncentrs, base: nil, proport: nil) ⇒ Object

Initializes a list of centroids.
#initialize(ncentrs:, dims:, vrange:, lrate:, simil_type: nil, encoding_type: nil, init_centr_vrange: nil, rseed: Random.new_seed) ⇒ VectorQuantization constructor

A new instance of VectorQuantization.
#most_similar_centr(vec) ⇒ Array<Integer, Float>

Returns index and similitude of most similar centroid to vector.
#new_centr(base = nil, proport = nil) ⇒ Object

Creates a new (random) centroid If a base is passed, this is meshed with the random centroid.
#reconstr_error(vec, code: nil, type: encoding_type) ⇒ NArray

Per-pixel errors in reconstructing vector.
#reconstruction(code, type: encoding_type) ⇒ Object

Reconstruct vector from its code (encoding).
#similarities(vec, type: simil_type) ⇒ Object

Computes similarities between vector and all centroids.
#train(vec_lst, debug: false) ⇒ Object

Train on vector list.
#train_one(vec) ⇒ Integer

Train on one vector.

Constructor Details

#initialize(ncentrs:, dims:, vrange:, lrate:, simil_type: nil, encoding_type: nil, init_centr_vrange: nil, rseed: Random.new_seed) ⇒ `VectorQuantization`

Returns a new instance of VectorQuantization.

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 9

def initialize ncentrs:, dims:, vrange:, lrate:, simil_type: nil, encoding_type: nil, init_centr_vrange: nil, rseed: Random.new_seed

  @rng = Random.new rseed # TODO: RNG CURRENTLY NOT USED!!

  @ncentrs = ncentrs
  @dims = Array(dims)
  check_lrate lrate # hack: so that we can overload it in dlr_vq
  @lrate = lrate
  @simil_type = simil_type || :dot
  @encoding_type = encoding_type || :ensemble_norm
  @init_centr_vrange ||= vrange
  @vrange = case vrange
    when Array
      raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
      vrange.map &method(:Float)
    when Range
      [vrange.first, vrange.last].map &method(:Float)
    else raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
  end
  init_centrs
  @ntrains = [0]*ncentrs # useful to understand what happens
  @utility = NArray.zeros [ncentrs] # trace how 'useful' are centroids to encodings
  @ncodes = 0
end

Instance Attribute Details

#centrs ⇒ `Object` (readonly)

Returns the value of attribute centrs.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def centrs
  @centrs
end

#dims ⇒ `Object` (readonly)

Returns the value of attribute dims.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def dims
  @dims
end

#encoding_type ⇒ `Object` (readonly)

Returns the value of attribute encoding_type.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def encoding_type
  @encoding_type
end

#init_centr_vrange ⇒ `Object` (readonly)

Returns the value of attribute init_centr_vrange.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def init_centr_vrange
  @init_centr_vrange
end

#lrate ⇒ `Object` (readonly)

Returns the value of attribute lrate.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def lrate
  @lrate
end

#ncentrs ⇒ `Object` (readonly)

Returns the value of attribute ncentrs.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def ncentrs
  @ncentrs
end

#ncodes ⇒ `Object`

Returns the value of attribute ncodes.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def ncodes
  @ncodes
end

#ntrains ⇒ `Object` (readonly)

Returns the value of attribute ntrains.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def ntrains
  @ntrains
end

#rng ⇒ `Object` (readonly)

Returns the value of attribute rng.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def rng
  @rng
end

#simil_type ⇒ `Object` (readonly)

Returns the value of attribute simil_type.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def simil_type
  @simil_type
end

#utility ⇒ `Object`

Returns the value of attribute utility.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def utility
  @utility
end

#vrange ⇒ `Object` (readonly)

Returns the value of attribute vrange.



5
6
7

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 5

def vrange
  @vrange
end

Instance Method Details

#check_lrate(lrate) ⇒ `Object`

Verify lrate to be present and withing unit bounds As a separate method only so it can be overloaded in ‘DecayingLearningRateVQ`

Raises:

(ArgumentError)



36
37
38

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 36

def check_lrate lrate
  raise ArgumentError, "Pass a `lrate` between 0 and 1" unless lrate&.between?(0,1)
end

#encode(vec, type: encoding_type) ⇒ `Object`

Encode a vector tracks utility of centroids based on how much they contribute to encoding TODO: ‘encode = Encodings.const_get(type)` in initialize` NOTE: hashes of lambdas or modules cannot access ncodes and utility

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 75

def encode vec, type: encoding_type
  simils = similarities vec
  case type
  when :most_similar
    code = simils.max_index
    @ncodes += 1
    @utility[code] += 1
    code
  when :ensemble
    code = simils
    tot = simils.sum
    tot = 1 if tot < 1e-5  # HACK: avoid division by zero
    contrib = code / tot
    @ncodes += 1
    @utility += (contrib - utility) / ncodes # cumulative moving average
    code
  when :ensemble_norm
    tot = simils.sum
    tot = 1 if tot < 1e-5  # HACK: avoid division by zero
    code = simils / tot
    @ncodes += 1
    @utility += (code - utility) / ncodes # cumulative moving average
    code
  else raise ArgumentError, "Unrecognized encode #{type}"
  end
end

#init_centrs(nc: ncentrs, base: nil, proport: nil) ⇒ `Object`

Initializes a list of centroids



41
42
43

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 41

def init_centrs nc: ncentrs, base: nil, proport: nil
  @centrs = nc.times.map { new_centr base, proport }
end

#most_similar_centr(vec) ⇒ `Array<Integer, Float>`

Returns index and similitude of most similar centroid to vector

Returns:

(Array<Integer, Float>) —

the index of the most similar centroid, followed by the corresponding similarity

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 119

def most_similar_centr vec
  simils = similarities vec
  max_idx = simils.max_index
  [max_idx, simils[max_idx]]
end

#new_centr(base = nil, proport = nil) ⇒ `Object`

Creates a new (random) centroid If a base is passed, this is meshed with the random centroid. This is done to facilitate distributing the training across centroids. TODO: USE RNG HERE!!

Raises:

(ArgumentError)

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 49

def new_centr base=nil, proport=nil
  raise ArgumentError, "Either both or none" if base.nil? ^ proport.nil?
  # require 'pry'; binding.pry if base.nil? ^ proport.nil?
  ret = NArray.new(*dims).rand(*init_centr_vrange)
  ret = ret * (1-proport) + base * proport if base&&proport
  ret
end

#reconstr_error(vec, code: nil, type: encoding_type) ⇒ `NArray`

Per-pixel errors in reconstructing vector

Returns:

(NArray) —

residuals

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 127

def reconstr_error vec, code: nil, type: encoding_type
  code ||= encode vec, type: type
  (vec - reconstruction(code, type: type)).abs.sum
end

#reconstruction(code, type: encoding_type) ⇒ `Object`

Reconstruct vector from its code (encoding)

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 103

def reconstruction code, type: encoding_type
  case type
  when :most_similar
    centrs[code]
  when :ensemble
    tot = code.reduce :+
    centrs.zip(code).map { |centr, contr| centr*contr/tot }.reduce :+
  when :ensemble_norm
    centrs.zip(code).map { |centr, contr| centr*contr }.reduce :+
  else raise ArgumentError, "unrecognized reconstruction type: #{type}"
  end
end

#similarities(vec, type: simil_type) ⇒ `Object`

Computes similarities between vector and all centroids

Raises:

(NotImplementedError)

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 63

def similarities vec, type: simil_type
  raise NotImplementedError if vec.shape.size > 1
  simil_fn = SIMIL[type] || raise(ArgumentError, "Unrecognized simil #{type}")
  NArray[*centrs.map { |centr| simil_fn.call centr, vec }]
  # require 'parallel'
  # NArray[*Parallel.map(centrs) { |c| c.dot(vec).first }]
end

#train(vec_lst, debug: false) ⇒ `Object`

Train on vector list

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 143

def train vec_lst, debug: false
  # Two ways here:
  # - Batch: canonical, centrs updated with each vec
  # - Parallel: could be parallel either on simils or on training (?)
  # Unsure on the correctness of either Parallel, let's stick with Batch
  vec_lst.each_with_index do |vec, i|
    trained_idx = train_one vec
    print '.' if debug
    @ntrains[trained_idx] += 1
  end
end

#train_one(vec) ⇒ `Integer`

Train on one vector

Returns:

(Integer) —

index of trained centroid

# File 'lib/machine_learning_workbench/compressor/vector_quantization.rb', line 134

def train_one vec
  trg_idx, _simil = most_similar_centr(vec)
  # note: uhm that actually looks like a dot product... maybe faster?
  #   `[c[i], vec].dot([1-lrate, lrate])`
  centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + vec * lrate
  trg_idx
end

Class: MachineLearningWorkbench::Compressor::VectorQuantization

Overview

Direct Known Subclasses

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ncentrs:, dims:, vrange:, lrate:, simil_type: nil, encoding_type: nil, init_centr_vrange: nil, rseed: Random.new_seed) ⇒ VectorQuantization

Instance Attribute Details

#centrs ⇒ Object (readonly)

#dims ⇒ Object (readonly)

#encoding_type ⇒ Object (readonly)

#init_centr_vrange ⇒ Object (readonly)

#lrate ⇒ Object (readonly)

#ncentrs ⇒ Object (readonly)

#ncodes ⇒ Object

#ntrains ⇒ Object (readonly)

#rng ⇒ Object (readonly)

#simil_type ⇒ Object (readonly)

#utility ⇒ Object

#vrange ⇒ Object (readonly)

Instance Method Details

#check_lrate(lrate) ⇒ Object

#encode(vec, type: encoding_type) ⇒ Object

#init_centrs(nc: ncentrs, base: nil, proport: nil) ⇒ Object

#most_similar_centr(vec) ⇒ Array<Integer, Float>

#new_centr(base = nil, proport = nil) ⇒ Object

#reconstr_error(vec, code: nil, type: encoding_type) ⇒ NArray

#reconstruction(code, type: encoding_type) ⇒ Object

#similarities(vec, type: simil_type) ⇒ Object

#train(vec_lst, debug: false) ⇒ Object

#train_one(vec) ⇒ Integer

#initialize(ncentrs:, dims:, vrange:, lrate:, simil_type: nil, encoding_type: nil, init_centr_vrange: nil, rseed: Random.new_seed) ⇒ `VectorQuantization`

#centrs ⇒ `Object` (readonly)

#dims ⇒ `Object` (readonly)

#encoding_type ⇒ `Object` (readonly)

#init_centr_vrange ⇒ `Object` (readonly)

#lrate ⇒ `Object` (readonly)

#ncentrs ⇒ `Object` (readonly)

#ncodes ⇒ `Object`

#ntrains ⇒ `Object` (readonly)

#rng ⇒ `Object` (readonly)

#simil_type ⇒ `Object` (readonly)

#utility ⇒ `Object`

#vrange ⇒ `Object` (readonly)

#check_lrate(lrate) ⇒ `Object`

#encode(vec, type: encoding_type) ⇒ `Object`

#init_centrs(nc: ncentrs, base: nil, proport: nil) ⇒ `Object`

#most_similar_centr(vec) ⇒ `Array<Integer, Float>`

#new_centr(base = nil, proport = nil) ⇒ `Object`

#reconstr_error(vec, code: nil, type: encoding_type) ⇒ `NArray`

#reconstruction(code, type: encoding_type) ⇒ `Object`

#similarities(vec, type: simil_type) ⇒ `Object`

#train(vec_lst, debug: false) ⇒ `Object`

#train_one(vec) ⇒ `Integer`