/**
  * Builds up a consistently random (same seed every time) sparse matrix, with sometimes repeated
  * rows.
  *
  * @param numRows
  * @param nonNullRows
  * @param numCols
  * @param entriesPerRow
  * @param entryMean
  * @return
  */
 public static Matrix randomSequentialAccessSparseMatrix(
     int numRows, int nonNullRows, int numCols, int entriesPerRow, double entryMean) {
   SparseRowMatrix m = new SparseRowMatrix(new int[] {numRows, numCols});
   double n = 0;
   Random r = new Random(1234L);
   for (int i = 0; i < nonNullRows; i++) {
     SequentialAccessSparseVector v = new SequentialAccessSparseVector(numCols);
     for (int j = 0; j < entriesPerRow; j++) {
       int col = r.nextInt(numCols);
       double val = r.nextGaussian();
       v.set(col, val * entryMean);
     }
     int c = r.nextInt(numRows);
     if (r.nextBoolean() || numRows == nonNullRows) {
       m.assignRow(numRows == nonNullRows ? i : c, v);
     } else {
       Vector other = m.getRow(r.nextInt(numRows));
       if (other != null && other.getLengthSquared() > 0) {
         m.assignRow(c, other.clone());
       }
     }
     n += m.getRow(c).getLengthSquared();
   }
   return m;
 }
 public static void assertEigen(
     Matrix eigens,
     VectorIterable corpus,
     int numEigensToCheck,
     double errorMargin,
     boolean isSymmetric) {
   for (int i = 0; i < numEigensToCheck; i++) {
     Vector e = eigens.getRow(i);
     if (e.getLengthSquared() == 0) {
       continue;
     }
     Vector afterMultiply = isSymmetric ? corpus.times(e) : corpus.timesSquared(e);
     double dot = afterMultiply.dot(e);
     double afterNorm = afterMultiply.getLengthSquared();
     double error = 1 - dot / Math.sqrt(afterNorm * e.getLengthSquared());
     assertTrue(
         "Error margin: {" + error + " too high! (for eigen " + i + ')',
         Math.abs(error) < errorMargin);
   }
 }
Пример #3
0
  @Test
  public void testInitialization() {
    // start with super clusterable data
    List<? extends WeightedVector> data = cubishTestData(0.01);

    // just do initialization of ball k-means.  This should drop a point into each of the clusters
    BallKMeans r = new BallKMeans(new BruteSearch(new EuclideanDistanceMeasure()), 6, 20);
    r.cluster(data);

    // put the centroids into a matrix
    Matrix x = new DenseMatrix(6, 5);
    int row = 0;
    for (Centroid c : r) {
      x.viewRow(row).assign(c.viewPart(0, 5));
      row++;
    }

    // verify that each column looks right.  Should contain zeros except for a single 6.
    final Vector columnNorms =
        x.aggregateColumns(
            new VectorFunction() {
              @Override
              public double apply(Vector f) {
                // return the sum of three discrepancy measures
                return Math.abs(f.minValue())
                    + Math.abs(f.maxValue() - 6)
                    + Math.abs(f.norm(1) - 6);
              }
            });
    // verify all errors are nearly zero
    assertEquals(0, columnNorms.norm(1) / columnNorms.size(), 0.1);

    // verify that the centroids are a permutation of the original ones
    SingularValueDecomposition svd = new SingularValueDecomposition(x);
    Vector s = svd.getS().viewDiagonal().assign(Functions.div(6));
    assertEquals(5, s.getLengthSquared(), 0.05);
    assertEquals(5, s.norm(1), 0.05);
  }
Пример #4
0
 /**
  * Return if the cluster is converged by comparing its center and centroid.
  *
  * @param measure The distance measure to use for cluster-point comparisons.
  * @param convergenceDelta the convergence delta to use for stopping.
  * @return if the cluster is converged
  */
 public boolean computeConvergence(DistanceMeasure measure, double convergenceDelta) {
   Vector centroid = computeCentroid();
   converged =
       measure.distance(centroid.getLengthSquared(), centroid, getCenter()) <= convergenceDelta;
   return converged;
 }