-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkmpp.js
306 lines (236 loc) · 6.97 KB
/
kmpp.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
var kmpp = (function () {
var ABS = Math.abs;
var POW = Math.pow;
var SQRT = Math.sqrt;
var RANDOM = Math.random;
var ROUND = Math.round;
var LOG = Math.log;
var ERR_K_IS_ZERO = 'k cannot be zero';
var reduce = null;
if (typeof Array.prototype.reduce === 'function') {
reduce = function (arr) {
var args = [].slice.apply(arguments);
arr = args.shift();
return Array.prototype.reduce.apply(arr, args);
};
} else if (typeof _ === 'function') {
reduce = _.reduce;
} else {
/* Thanks to madrobby and Karnash */
reduce = function(t,c) {
var u;
for (var i = (v=t[0],1); i < t.length;)
v = c(v,t[i],i++,t);
i<2 & u && u();
return v;
};
}
/**
* @method kmpp
* @constructor
*/
function kmpp() {
this.kmpp = true;
this.iterations = 0;
this.converged = false;
this.maxIterations = -1;
this.k = 0;
}
/**
* Resets k-means.
* @method reset
*/
kmpp.prototype.reset = function () {
this.iterations = 0;
this.converged = false;
this.points = [];
this.centroids = [];
};
/**
* Measures the Manhattan distance between two points.
* @method distance
* @param {object} a
* @param {object} b
* @returns {number} distance
*/
kmpp.prototype.distance = function(a, b) {
return ABS(a.x - b.x) + ABS(a.y - b.y);
};
/**
* Resets k-means and sets initial points.
* @method setPoints
* @param {object[]} points
*/
kmpp.prototype.setPoints = function (points) {
this.reset();
this.points = points;
};
/**
* Guess the amount of centroids to be found by the rule of thumb
* @method guessK
*/
kmpp.prototype.guessK = function () {
this.k = ~~(SQRT(this.points.length*0.5));
};
/**
* Chooses random centroids.
* @method chooseRandomCentroids
*/
kmpp.prototype.chooseRandomCentroids = function () {
for (var i = 0; i < this.k; ++i) {
var point = ~~(RANDOM() * this.points.length);
var centroid = {
centroid : i,
x : this.points[point].x,
y : this.points[point].y,
items : 0
};
this.centroids[i] = centroid;
}
};
/**
* Clusters the current set of points.
* @method cluster
* @param {function} callback
*/
kmpp.prototype.cluster = function (callback) {
if (this.k === 0) {
if (typeof callback === 'function') {
callback(new Error(ERR_K_IS_ZERO));
} else {
throw new Error(ERR_K_IS_ZERO);
}
return;
}
/** Iterate until converged or the maximum amount of iterations is reached. */
while (!this.converged || (this.maxIterations > 0 && this.iterations > this.maxIterations)) {
this.iterate();
}
if (typeof callback === 'function') callback(null, this.centroids);
};
/**
* Iterates over the provided points one time.
* @method iterate
*/
kmpp.prototype.iterate = function () {
var i, j;
/** When the result doesn't change anymore, the final result has been found. */
if (this.converged === true) return;
this.converged = true;
++this.iterations;
/** Prepares the array of the */
var sums = new Array(this.k);
for (i = 0; i < this.k; ++i) sums[i] = [0, 0, 0]; // x, y, item count
/** Find the closest centroid for each point */
for (i = 0, l = this.points.length; i < l; ++i) {
/** Finds the centroid with the closest distance to the current point */
var centroid = 0;
var minDist = this.distance(this.centroids[0], this.points[i]);
for(j = 1; j < this.centroids.length; j++){
var dist = this.distance(this.centroids[j], this.points[i]);
if(dist < minDist){
minDist = dist;
centroid = j;
}
}
/**
* When the point is not attached to a centroid or the point was
* attached to some other centroid before, the result differs from the
* previous iteration.
*/
if (typeof this.points[i].centroid !== 'number' || this.points[i].centroid !== centroid) {
this.converged = false;
}
/** Attach the point to the centroid */
this.points[i].centroid = centroid;
/** Add the points' coordinates to the sum of its centroid */
sums[centroid][0] += this.points[i].x;
sums[centroid][1] += this.points[i].y;
++sums[centroid][2];
}
/** Re-calculate the center of the centroid. */
for (i = 0; i < this.k; ++i) {
if (sums[i][2] > 0) {
this.centroids[i].x = sums[i][0] / sums[i][2];
this.centroids[i].y = sums[i][1] / sums[i][2];
}
this.centroids[i].items = sums[i][2];
}
};
/**
* Initializes the cendroids using k-means++.
* @method initCentroids
*/
kmpp.prototype.initCentroids = function () {
var i, k,cmp1, cmp2;
var addIterator = function (x,y) { return x+y; };
/**
* When k-means++ is disabled, choose random centroids.
*/
if (this.kmpp !== true) {
this.chooseRandomCentroids();
return;
}
/** K-Means++ initialization */
/** determine the amount of tries */
var D = [];
var ntries = 2 + ROUND(LOG(this.k));
/** 1. Choose one center uniformly at random from the data points. */
var l = this.points.length;
var p0 = this.points[ ~~(RANDOM() * l) ];
p0.centroid = 0;
this.centroids = [ p0 ];
/**
* 2. For each data point x, compute D(x), the distance between x and
* the nearest center that has already been chosen.
*/
for (i = 0; i < l; ++i) D[i] = this.distance(p0, this.points[i]);
var Dsum = reduce(D, addIterator);
/**
* 3. Choose one new data point at random as a new center, using a
* weighted probability distribution where a point x is chosen with
* probability proportional to D(x)2.
* (Repeated until k centers have been chosen.)
*/
for (k = 1; k < this.k; ++k) {
var bestDsum = -1, bestIdx = -1;
for (i = 0; i < ntries; ++i) {
var rndVal = ~~(RANDOM() * Dsum);
var n = 0;
for (; n < l; ++n) {
if (rndVal <= D[n]) {
break;
} else {
rndVal -= D[n];
}
}
var tmpD = [];
for (var m = 0; m < l; ++m) {
cmp1 = D[m];
cmp2 = this.distance(this.points[m],this.points[n]);
tmpD[m] = cmp1 > cmp2 ? cmp2 : cmp1;
}
var tmpDsum = reduce(tmpD, addIterator);
if (bestDsum < 0 || tmpDsum < bestDsum) {
bestDsum = tmpDsum;
bestIdx = n;
}
}
Dsum = bestDsum;
var centroid = {
x : this.points[bestIdx].x,
y : this.points[bestIdx].y,
centroid : k,
items : 0
};
this.centroids.push(centroid);
for (i = 0; i < l; ++i) {
cmp1 = D[i];
cmp2 = this.distance(this.points[bestIdx],this.points[i]);
D[i] = cmp1 > cmp2 ? cmp2 : cmp1;
}
}
};
return kmpp;
})();
if (typeof module === 'object') module.exports = kmpp;