@Test public void testSingleLinkage() { Dataset<? extends Instance> dataset = FakeClustering.kumarData(); assertEquals(6, dataset.size()); Props pref = new Props(); pref.put(AlgParams.LINKAGE, SingleLinkage.name); pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING); pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true); HierarchicalResult result = subject.hierarchy(dataset, pref); Matrix similarityMatrix = result.getProximityMatrix(); assertNotNull(similarityMatrix); assertEquals(similarityMatrix.rowsCount(), dataset.size()); assertEquals(similarityMatrix.columnsCount(), dataset.size()); System.out.println("kumar - single"); DendroTreeData tree = result.getTreeData(); tree.print(); assertEquals(dataset.size(), tree.numLeaves()); DendroNode root = tree.getRoot(); assertEquals(0.21587033144922904, root.getHeight(), DELTA); int levels = tree.distinctHeights(1e-7); // TODO: in this example nodes #7 and #8 are on different level, // but their height is the same. should we consider those as different assertEquals(4, levels); }
/** * Sample covariance * * @param dataset * @return */ protected double covariance(Dataset<E> dataset) { Matrix m = dataset.asMatrix(); Matrix cov = new SymmetricMatrixDiag(m.columnsCount()); DenseVector mean = new DenseVector(dataset.attributeCount()); for (int i = 0; i < mean.size(); i++) { mean.set(i, dataset.getAttribute(i).statistics(StatsNum.MEAN)); } Vector v; double res, sum = 0.0; for (int i = 0; i < m.rowsCount(); i++) { v = m.getRowVector(i).minus(mean); res = v.dot(v); sum += res; } return sum / (m.rowsCount() - 1); /* for (int i = 0; i < m.columnsCount(); i++) { mean = dataset.getAttribute(i).statistics(StatsNum.AVG); cov.set(i, i, dataset.getAttribute(i).statistics(StatsNum.VARIANCE)); for (int j = 0; j < i; j++) { //cov.set(i, j, mean); } } */ // return cov; }
@Override public void setProximityMatrix(Matrix m) { if (m.rowsCount() != m.columnsCount()) { throw new RuntimeException( "expected square matrix, got " + m.rowsCount() + " x " + m.columnsCount()); } this.proximity = m; }
@Test public void testColumnClustering() throws IOException { Dataset<? extends Instance> dataset = FakeClustering.schoolData(); Props pref = new Props(); pref.put(AlgParams.LINKAGE, SingleLinkage.name); pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.COLUMNS_CLUSTERING); pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true); HierarchicalResult result = subject.hierarchy(dataset, pref); Matrix similarityMatrix = result.getProximityMatrix(); assertNotNull(similarityMatrix); assertEquals(similarityMatrix.rowsCount(), dataset.attributeCount()); assertEquals(similarityMatrix.columnsCount(), dataset.attributeCount()); result.getTreeData().print(); }