Exemplo n.º 1
0
  @Test
  public void testSingleLinkage() {
    Dataset<? extends Instance> dataset = FakeClustering.kumarData();
    assertEquals(6, dataset.size());
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING);
    pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    Matrix similarityMatrix = result.getProximityMatrix();
    assertNotNull(similarityMatrix);
    assertEquals(similarityMatrix.rowsCount(), dataset.size());
    assertEquals(similarityMatrix.columnsCount(), dataset.size());
    System.out.println("kumar - single");
    DendroTreeData tree = result.getTreeData();
    tree.print();
    assertEquals(dataset.size(), tree.numLeaves());
    DendroNode root = tree.getRoot();
    assertEquals(0.21587033144922904, root.getHeight(), DELTA);

    int levels = tree.distinctHeights(1e-7);
    // TODO: in this example nodes #7 and #8 are on different level,
    // but their height is the same. should we consider those as different
    assertEquals(4, levels);
  }
Exemplo n.º 2
0
  @Test
  public void testColumnClustering() throws IOException {
    Dataset<? extends Instance> dataset = FakeClustering.schoolData();
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.COLUMNS_CLUSTERING);
    pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    Matrix similarityMatrix = result.getProximityMatrix();
    assertNotNull(similarityMatrix);
    assertEquals(similarityMatrix.rowsCount(), dataset.attributeCount());
    assertEquals(similarityMatrix.columnsCount(), dataset.attributeCount());

    result.getTreeData().print();
  }
Exemplo n.º 3
0
  @Test
  public void testSingleLinkageSchool() {
    Dataset<? extends Instance> dataset = FakeClustering.schoolData();
    assertEquals(17, dataset.size());
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    System.out.println("school - single");
    DendroTreeData tree = result.getTreeData();
    tree.print();
    assertEquals(dataset.size(), tree.numLeaves());
    DendroNode root = tree.getRoot();
    assertEquals(32.542734980330046, root.getHeight(), DELTA);
    assertEquals(2 * dataset.size() - 1, tree.numNodes());

    assertEquals(16, tree.distinctHeights());
    assertEquals(8, tree.treeLevels());
  }
Exemplo n.º 4
0
  @Test
  public void testInverseSorting() {
    Dataset<? extends Instance> dataset = FakeClustering.kumarData();
    assertEquals(6, dataset.size());
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING);
    // inverse ordering
    pref.put(AlgParams.SMALLEST_FIRST, false);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    System.out.println("kumar - inverse");
    DendroTreeData tree = result.getTreeData();
    tree.print();
    assertEquals(dataset.size(), tree.numLeaves());
    DendroNode root = tree.getRoot();
    assertEquals(0.10198039027185574, root.getHeight(), DELTA);

    assertEquals(5, tree.distinctHeights());
    assertEquals(4, tree.treeLevels());
  }