@Test public void testSingleLinkage() { Dataset<? extends Instance> dataset = FakeClustering.kumarData(); assertEquals(6, dataset.size()); Props pref = new Props(); pref.put(AlgParams.LINKAGE, SingleLinkage.name); pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING); pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true); HierarchicalResult result = subject.hierarchy(dataset, pref); Matrix similarityMatrix = result.getProximityMatrix(); assertNotNull(similarityMatrix); assertEquals(similarityMatrix.rowsCount(), dataset.size()); assertEquals(similarityMatrix.columnsCount(), dataset.size()); System.out.println("kumar - single"); DendroTreeData tree = result.getTreeData(); tree.print(); assertEquals(dataset.size(), tree.numLeaves()); DendroNode root = tree.getRoot(); assertEquals(0.21587033144922904, root.getHeight(), DELTA); int levels = tree.distinctHeights(1e-7); // TODO: in this example nodes #7 and #8 are on different level, // but their height is the same. should we consider those as different assertEquals(4, levels); }
@Override public int getMappedIndex(int idx) { if (treeData == null) { throw new RuntimeException("Empty tree data"); } return treeData.getMappedId(idx); }
@Override public int treeLevels() { if (treeData != null) { return treeData.treeLevels(); } return 0; }
@Override public double cutTreeByLevel(int level) { DendroNode node = treeData.getRoot(); double cut = findLevel(node, level); this.clustering = updateCutoff(cut); return cut; }
private DendroNode getNode(int idx) { DendroNode node = treeData.getLeaf(idx); if (node == null) { node = new DTreeNode(); node.setId(idx); } return node; }
/** * TODO: this is hardly correct * * @return */ @Override public double getMaxTreeHeight() { if (treeData == null) { LOG.info("constructing tree"); constructTree(); } return treeData.getRoot().getHeight(); }
/** * Return leaves mapping to indexes in dataset * * @return */ @Override public int[] getMapping() { /* if (mapping == null && merges != null) { * updateMapping(); * } * * return mapping; */ if (treeData != null) { return treeData.getMapping(); } return null; }
private void constructTree() { if (merges == null) { throw new RuntimeException("merges empty!"); } LOG.info("constructing tree, merge size:{}", merges.size()); treeData = new DynamicTreeData(); DendroNode[] nodes = new DendroNode[merges.size() + 1]; DendroNode current = null; DendroNode prev = null; // for (Merge m : getMerges()) { Merge m; for (int i = merges.size() - 1; i >= 0; i--) { m = merges.get(i); current = new DTreeNode(); // bottom level if (prev == null) { prev = getNode(m.remainingCluster()); } current.setLeft(prev); current.setRight(getNode(m.mergedCluster())); current.setHeight(m.similarity()); prev = current; // System.out.println("merge: " + m.mergedCluster() + " remain: " + m.remainingCluster() + " // similarity = " + m.similarity()); } numNodes = 0; // number leaves, so that we can compute it's position numberLeaves(current); treeData.updatePositions(current); treeData.setRoot(current); treeData.setLeaves(nodes); LOG.info("max tree height: {}", current.getHeight()); }
private void subtreeToCluster(DendroNode node, Cluster c, int[] assign) { if (node.isLeaf()) { if (treeData.containsClusters()) { DClusterLeaf<E> leaf = (DClusterLeaf) node; for (E instance : leaf.getInstances()) { c.add(instance); assign[instance.getIndex()] = c.getClusterId(); } } else { c.add(((DendroLeaf) node).getData()); assign[node.getId()] = c.getClusterId(); } } else { subtreeToCluster(node.getLeft(), c, assign); subtreeToCluster(node.getRight(), c, assign); } }
@Test public void testSingleLinkageSchool() { Dataset<? extends Instance> dataset = FakeClustering.schoolData(); assertEquals(17, dataset.size()); Props pref = new Props(); pref.put(AlgParams.LINKAGE, SingleLinkage.name); pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING); HierarchicalResult result = subject.hierarchy(dataset, pref); System.out.println("school - single"); DendroTreeData tree = result.getTreeData(); tree.print(); assertEquals(dataset.size(), tree.numLeaves()); DendroNode root = tree.getRoot(); assertEquals(32.542734980330046, root.getHeight(), DELTA); assertEquals(2 * dataset.size() - 1, tree.numNodes()); assertEquals(16, tree.distinctHeights()); assertEquals(8, tree.treeLevels()); }
@Override public Clustering updateCutoff(double cutoff) { this.cutoff = cutoff; int[] assign = new int[dataset.size()]; int estClusters = (int) Math.sqrt(dataset.size()); colorGenerator.reset(); num = 0; // human readable Clustering clusters = new ClusterList(estClusters); DendroNode root = treeData.getRoot(); if (root != null) { checkCutoff(root, cutoff, clusters, assign); if (clusters.size() > 0) { mapping = assign; } else { LOG.info("failed to cutoff dendrogram, cut = {}", cutoff); } } // add input dataset to clustering lookup if (noise != null) { Cluster clust = new BaseCluster<>(noise.size()); clust.setColor(colorGenerator.next()); clust.setClusterId(num++); clust.setParent(getDataset()); clust.setName("Noise"); clust.setAttributes(getDataset().getAttributes()); for (Instance ins : noise) { clust.add(ins); mapping[ins.getIndex()] = num - 1; } clusters.add(clust); } clusters.lookupAdd(dataset); if (dendroMapping != null) { clusters.lookupAdd(dendroMapping); } clusters.lookupAdd(this); return clusters; }
private void checkCutoff(DendroNode node, double cutoff, Clustering clusters, int[] assign) { if (node.isLeaf()) { if (treeData.containsClusters()) { DClusterLeaf<E> leaf = (DClusterLeaf) node; Cluster clust = makeCluster(clusters); for (E instance : leaf.getInstances()) { clust.add(instance); assign[instance.getIndex()] = clust.getClusterId(); } } return; } if (node.getHeight() == cutoff) { // both branches goes to the same cluster Cluster clust = makeCluster(clusters); subtreeToCluster(node, clust, assign); } else if (node.getLeft().getHeight() < cutoff || node.getRight().getHeight() < cutoff) { Cluster clust; if (node.getLeft().getHeight() < cutoff && node.getRight().getHeight() < cutoff) { clust = makeCluster(clusters); subtreeToCluster(node.getLeft(), clust, assign); clust = makeCluster(clusters); subtreeToCluster(node.getRight(), clust, assign); } else if (node.getRight().getHeight() < cutoff) { clust = makeCluster(clusters); subtreeToCluster(node.getRight(), clust, assign); checkCutoff(node.getLeft(), cutoff, clusters, assign); } else if (node.getLeft().getHeight() < cutoff) { clust = makeCluster(clusters); subtreeToCluster(node.getLeft(), clust, assign); checkCutoff(node.getRight(), cutoff, clusters, assign); } } else { checkCutoff(node.getLeft(), cutoff, clusters, assign); checkCutoff(node.getRight(), cutoff, clusters, assign); } }
@Test public void testInverseSorting() { Dataset<? extends Instance> dataset = FakeClustering.kumarData(); assertEquals(6, dataset.size()); Props pref = new Props(); pref.put(AlgParams.LINKAGE, SingleLinkage.name); pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING); // inverse ordering pref.put(AlgParams.SMALLEST_FIRST, false); HierarchicalResult result = subject.hierarchy(dataset, pref); System.out.println("kumar - inverse"); DendroTreeData tree = result.getTreeData(); tree.print(); assertEquals(dataset.size(), tree.numLeaves()); DendroNode root = tree.getRoot(); assertEquals(0.10198039027185574, root.getHeight(), DELTA); assertEquals(5, tree.distinctHeights()); assertEquals(4, tree.treeLevels()); }
@Override public void setTreeData(DendroTreeData treeData) { this.treeData = treeData; treeData.updatePositions(treeData.getRoot()); }
@Override public double getHeightByLevel(int level) { return findLevelHeight(treeData.getRoot(), level); }