Module: KMeansClustering

Defined in:
lib/kmeans-clustering.rb

Defined Under Namespace

Classes: Job

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.calcAverageObject

Returns the value of attribute calcAverage.



7
8
9
# File 'lib/kmeans-clustering.rb', line 7

def calcAverage
  @calcAverage
end

.calcDistanceSquaredObject

Returns the value of attribute calcDistanceSquared.



8
9
10
# File 'lib/kmeans-clustering.rb', line 8

def calcDistanceSquared
  @calcDistanceSquared
end

.calcSumObject

Returns the value of attribute calcSum.



6
7
8
# File 'lib/kmeans-clustering.rb', line 6

def calcSum
  @calcSum
end

Class Method Details

.run(centers, elements, nb_iterations, nb_jobs) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/kmeans-clustering.rb', line 28

def self.run(centers, elements, nb_iterations, nb_jobs)
  nb_iterations.times do
    # create jobs
    jobs = []
    elements_for_jobs = split_array_into_parts(elements, nb_jobs)
    nb_jobs.times do |i|
      jobs << Job.new(centers, elements_for_jobs[i])
    end

    # run jobs in parallel
    queue = Cabiri::JobQueue.new
    nb_jobs.times do |i|
      queue.add(i) { jobs[i].run }
    end
    queue.start(nb_jobs)

    # sort aggregated proximity data by center
    sorted_aggregated_proximity_data = Hash.new { |h,k| h[k] = [] }

    queue.finished_jobs.values.each do |finished_job|
      aggregated_proximity_data = finished_job.result
      aggregated_proximity_data.each do |center, aggregated_data|
        sorted_aggregated_proximity_data[center] << aggregated_data
      end
    end

    # calculate sum and nb elements for each center
    sums = Hash.new { |h,k| h[k] = [] }
    nb_elements = Hash.new { |h,k| h[k] = [] }

    sorted_aggregated_proximity_data.each do |center, aggregated_data|
      sums[center] = KMeansClustering::calcSum.call(aggregated_data.collect { |d| d[:sum] })
      nb_elements[center] = (aggregated_data.collect { |d| d[:nb_elements] }).inject(0, :+)
    end

    # calculate new centers
    centers = []
    sums.keys.each do |center|
      centers << KMeansClustering::calcAverage.call(sums[center], nb_elements[center])
    end
  end

  centers
end

.split_array_into_parts(array, nb_parts) ⇒ Object

split array into several equal sized parts taken from apidock.com/rails/v3.2.8/Array/in_groups



13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/kmeans-clustering.rb', line 13

def self.split_array_into_parts(array, nb_parts)
  start = 0
  groups = []

  modulo = array.size % nb_parts
  division = array.size / nb_parts

  nb_parts.times do |index|
    length = division + (modulo > 0 && modulo > index ? 1 : 0)
    groups << array.slice(start, length)
    start += length
  end
  groups
end