Module: KMeansClustering
- Defined in:
- lib/kmeans-clustering.rb
Defined Under Namespace
Classes: Job
Class Attribute Summary collapse
-
.calcAverage ⇒ Object
Returns the value of attribute calcAverage.
-
.calcDistanceSquared ⇒ Object
Returns the value of attribute calcDistanceSquared.
-
.calcSum ⇒ Object
Returns the value of attribute calcSum.
Class Method Summary collapse
- .run(centers, elements, nb_iterations, nb_jobs) ⇒ Object
-
.split_array_into_parts(array, nb_parts) ⇒ Object
split array into several equal sized parts taken from apidock.com/rails/v3.2.8/Array/in_groups.
Class Attribute Details
.calcAverage ⇒ Object
Returns the value of attribute calcAverage.
7 8 9 |
# File 'lib/kmeans-clustering.rb', line 7 def calcAverage @calcAverage end |
.calcDistanceSquared ⇒ Object
Returns the value of attribute calcDistanceSquared.
8 9 10 |
# File 'lib/kmeans-clustering.rb', line 8 def calcDistanceSquared @calcDistanceSquared end |
.calcSum ⇒ Object
Returns the value of attribute calcSum.
6 7 8 |
# File 'lib/kmeans-clustering.rb', line 6 def calcSum @calcSum end |
Class Method Details
.run(centers, elements, nb_iterations, nb_jobs) ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/kmeans-clustering.rb', line 28 def self.run(centers, elements, nb_iterations, nb_jobs) nb_iterations.times do # create jobs jobs = [] elements_for_jobs = split_array_into_parts(elements, nb_jobs) nb_jobs.times do |i| jobs << Job.new(centers, elements_for_jobs[i]) end # run jobs in parallel queue = Cabiri::JobQueue.new nb_jobs.times do |i| queue.add(i) { jobs[i].run } end queue.start(nb_jobs) # sort aggregated proximity data by center sorted_aggregated_proximity_data = Hash.new { |h,k| h[k] = [] } queue.finished_jobs.values.each do |finished_job| aggregated_proximity_data = finished_job.result aggregated_proximity_data.each do |center, aggregated_data| sorted_aggregated_proximity_data[center] << aggregated_data end end # calculate sum and nb elements for each center sums = Hash.new { |h,k| h[k] = [] } nb_elements = Hash.new { |h,k| h[k] = [] } sorted_aggregated_proximity_data.each do |center, aggregated_data| sums[center] = KMeansClustering::calcSum.call(aggregated_data.collect { |d| d[:sum] }) nb_elements[center] = (aggregated_data.collect { |d| d[:nb_elements] }).inject(0, :+) end # calculate new centers centers = [] sums.keys.each do |center| centers << KMeansClustering::calcAverage.call(sums[center], nb_elements[center]) end end centers end |
.split_array_into_parts(array, nb_parts) ⇒ Object
split array into several equal sized parts taken from apidock.com/rails/v3.2.8/Array/in_groups
13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/kmeans-clustering.rb', line 13 def self.split_array_into_parts(array, nb_parts) start = 0 groups = [] modulo = array.size % nb_parts division = array.size / nb_parts nb_parts.times do |index| length = division + (modulo > 0 && modulo > index ? 1 : 0) groups << array.slice(start, length) start += length end groups end |