@Override protected void map(T key, VectorWritable v, Context context) throws IOException, InterruptedException { double d = scale(v); if (d == 1.0) { outputVector.assign(v.get(), Functions.PLUS); } else if (d != 0.0) { outputVector.assign(v.get(), Functions.plusMult(d)); } }
@Test public void testInitialization() { // start with super clusterable data List<? extends WeightedVector> data = cubishTestData(0.01); // just do initialization of ball k-means. This should drop a point into each of the clusters BallKMeans r = new BallKMeans(new BruteSearch(new EuclideanDistanceMeasure()), 6, 20); r.cluster(data); // put the centroids into a matrix Matrix x = new DenseMatrix(6, 5); int row = 0; for (Centroid c : r) { x.viewRow(row).assign(c.viewPart(0, 5)); row++; } // verify that each column looks right. Should contain zeros except for a single 6. final Vector columnNorms = x.aggregateColumns( new VectorFunction() { @Override public double apply(Vector f) { // return the sum of three discrepancy measures return Math.abs(f.minValue()) + Math.abs(f.maxValue() - 6) + Math.abs(f.norm(1) - 6); } }); // verify all errors are nearly zero assertEquals(0, columnNorms.norm(1) / columnNorms.size(), 0.1); // verify that the centroids are a permutation of the original ones SingularValueDecomposition svd = new SingularValueDecomposition(x); Vector s = svd.getS().viewDiagonal().assign(Functions.div(6)); assertEquals(5, s.getLengthSquared(), 0.05); assertEquals(5, s.norm(1), 0.05); }