Module: EverydayCliUtils::Kmeans

Defined in:
lib/everyday-cli-utils/safe/kmeans.rb

Class Method Summary collapse

Class Method Details

.kmeans(collection, k) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 119

def self.kmeans(collection, k)
  mi   = collection.min
  ma   = collection.max
  diff = ma - mi
  ks   = []
  (1..k).each { |i| ks[i - 1] = mi + (i * (diff / (k + 1.0))) }
  kso = false
  while ks != kso
    kso, ks = run_kmean(collection, ks)
  end
  ks
end

.nmeans(collection, max_k = 10, threshold = 0.05) ⇒ Object



132
133
134
135
136
137
138
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 132

def self.nmeans(collection, max_k = 10, threshold = 0.05)
  collection    = EverydayCliUtils::MapUtil.floats(collection)
  avg, cnt, ks1 = nmeans_setup_1(collection)
  return ks1 if cnt == 1
  ft, ft1, ft2, ks = nmeans_setup_2(collection, avg, cnt, ks1)
  run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold)
end

.nmeans_setup_1(collection) ⇒ Object



73
74
75
76
77
78
79
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 73

def self.nmeans_setup_1(collection)
  su  = EverydayCliUtils::MapUtil.sum(collection)
  cnt = collection.count
  avg = su / cnt
  ks1 = kmeans(collection, 1)
  return avg, cnt, ks1
end

.nmeans_setup_2(collection, avg, cnt, ks1) ⇒ Object



81
82
83
84
85
86
87
88
89
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 81

def self.nmeans_setup_2(collection, avg, cnt, ks1)
  cso = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks1)
  ft1 = EverydayCliUtils::KmeansUtil.f_test2(cso, ks1, cnt)
  ks  = kmeans(collection, 2)
  cs  = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks)
  ft  = EverydayCliUtils::KmeansUtil.f_test(cs, ks, cnt, avg)
  ft2 = EverydayCliUtils::KmeansUtil.f_test2(cs, ks, cnt)
  return ft, ft1, ft2, ks
end

.outliers(collection, sensitivity = 0.5, k = nil) ⇒ Object



140
141
142
143
144
145
146
147
148
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 140

def self.outliers(collection, sensitivity = 0.5, k = nil)
  ks = k.nil? ? nmeans(collection) : kmeans(collection, k)
  cs = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks)

  outliers = []

  ks.each_with_index { |avg, i| outliers += EverydayCliUtils::KmeansUtil.find_outliers(avg, cs, i, sensitivity) }
  outliers
end

.run_kmean(collection, ks) ⇒ Object



110
111
112
113
114
115
116
117
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 110

def self.run_kmean(collection, ks)
  kso      = ks
  clusters = EverydayCliUtils::KmeansUtil.get_clusters(collection, kso)
  ks       = []
  clusters.each_with_index { |val, key| ks[key] = (val.count <= 0) ? kso[key] : (val.sum / val.count) }
  ks.sort
  return kso, ks
end

.run_nmean(collection, avg, cnt, ft, ft2, k, ks) ⇒ Object



91
92
93
94
95
96
97
98
99
100
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 91

def self.run_nmean(collection, avg, cnt, ft, ft2, k, ks)
  kso  = ks
  fto  = ft
  fto2 = ft2
  ks   = kmeans(collection, k)
  cs   = EverydayCliUtils::KmeansUtil.get_clusters(collection, ks)
  ft   = EverydayCliUtils::KmeansUtil.f_test(cs, ks, cnt, avg)
  ft2  = EverydayCliUtils::KmeansUtil.f_test2(cs, ks, cnt)
  return ft, ft2, fto, fto2, ks, kso
end

.run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold) ⇒ Object



102
103
104
105
106
107
108
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 102

def self.run_nmeans(avg, cnt, collection, ft, ft1, ft2, ks, ks1, max_k, threshold)
  (3..[max_k, cnt].min).each { |k|
    ft, ft2, fto, fto2, ks, kso = run_nmean(collection, avg, cnt, ft, ft2, k, ks)
    return kso if ((ft - fto) / fto) < threshold && fto2 < ft1
  }
  ft2 >= ft1 ? ks1 : ks
end