Esempio n. 1
0
  @Test
  public void testSingleLinkage() {
    Dataset<? extends Instance> dataset = FakeClustering.kumarData();
    assertEquals(6, dataset.size());
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING);
    pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    Matrix similarityMatrix = result.getProximityMatrix();
    assertNotNull(similarityMatrix);
    assertEquals(similarityMatrix.rowsCount(), dataset.size());
    assertEquals(similarityMatrix.columnsCount(), dataset.size());
    System.out.println("kumar - single");
    DendroTreeData tree = result.getTreeData();
    tree.print();
    assertEquals(dataset.size(), tree.numLeaves());
    DendroNode root = tree.getRoot();
    assertEquals(0.21587033144922904, root.getHeight(), DELTA);

    int levels = tree.distinctHeights(1e-7);
    // TODO: in this example nodes #7 and #8 are on different level,
    // but their height is the same. should we consider those as different
    assertEquals(4, levels);
  }
Esempio n. 2
0
  /**
   * Sample covariance
   *
   * @param dataset
   * @return
   */
  protected double covariance(Dataset<E> dataset) {
    Matrix m = dataset.asMatrix();
    Matrix cov = new SymmetricMatrixDiag(m.columnsCount());

    DenseVector mean = new DenseVector(dataset.attributeCount());
    for (int i = 0; i < mean.size(); i++) {
      mean.set(i, dataset.getAttribute(i).statistics(StatsNum.MEAN));
    }

    Vector v;
    double res, sum = 0.0;
    for (int i = 0; i < m.rowsCount(); i++) {
      v = m.getRowVector(i).minus(mean);
      res = v.dot(v);
      sum += res;
    }
    return sum / (m.rowsCount() - 1);

    /* for (int i = 0; i < m.columnsCount(); i++) {            mean = dataset.getAttribute(i).statistics(StatsNum.AVG);
        cov.set(i, i, dataset.getAttribute(i).statistics(StatsNum.VARIANCE));
        for (int j = 0; j < i; j++) {
            //cov.set(i, j, mean);
        }
    } */

    // return cov;
  }
Esempio n. 3
0
 @Override
 public void setProximityMatrix(Matrix m) {
   if (m.rowsCount() != m.columnsCount()) {
     throw new RuntimeException(
         "expected square matrix, got " + m.rowsCount() + " x " + m.columnsCount());
   }
   this.proximity = m;
 }
Esempio n. 4
0
  @Test
  public void testColumnClustering() throws IOException {
    Dataset<? extends Instance> dataset = FakeClustering.schoolData();
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.COLUMNS_CLUSTERING);
    pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    Matrix similarityMatrix = result.getProximityMatrix();
    assertNotNull(similarityMatrix);
    assertEquals(similarityMatrix.rowsCount(), dataset.attributeCount());
    assertEquals(similarityMatrix.columnsCount(), dataset.attributeCount());

    result.getTreeData().print();
  }