Beispiel #1
0
    @Override
    protected void map(T key, VectorWritable v, Context context)
        throws IOException, InterruptedException {

      double d = scale(v);
      if (d == 1.0) {
        outputVector.assign(v.get(), Functions.PLUS);
      } else if (d != 0.0) {
        outputVector.assign(v.get(), Functions.plusMult(d));
      }
    }
Beispiel #2
0
  @Test
  public void testInitialization() {
    // start with super clusterable data
    List<? extends WeightedVector> data = cubishTestData(0.01);

    // just do initialization of ball k-means.  This should drop a point into each of the clusters
    BallKMeans r = new BallKMeans(new BruteSearch(new EuclideanDistanceMeasure()), 6, 20);
    r.cluster(data);

    // put the centroids into a matrix
    Matrix x = new DenseMatrix(6, 5);
    int row = 0;
    for (Centroid c : r) {
      x.viewRow(row).assign(c.viewPart(0, 5));
      row++;
    }

    // verify that each column looks right.  Should contain zeros except for a single 6.
    final Vector columnNorms =
        x.aggregateColumns(
            new VectorFunction() {
              @Override
              public double apply(Vector f) {
                // return the sum of three discrepancy measures
                return Math.abs(f.minValue())
                    + Math.abs(f.maxValue() - 6)
                    + Math.abs(f.norm(1) - 6);
              }
            });
    // verify all errors are nearly zero
    assertEquals(0, columnNorms.norm(1) / columnNorms.size(), 0.1);

    // verify that the centroids are a permutation of the original ones
    SingularValueDecomposition svd = new SingularValueDecomposition(x);
    Vector s = svd.getS().viewDiagonal().assign(Functions.div(6));
    assertEquals(5, s.getLengthSquared(), 0.05);
    assertEquals(5, s.norm(1), 0.05);
  }