@Test public void testColumnClustering() throws IOException { Dataset<? extends Instance> dataset = FakeClustering.schoolData(); Props pref = new Props(); pref.put(AlgParams.LINKAGE, SingleLinkage.name); pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.COLUMNS_CLUSTERING); pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true); HierarchicalResult result = subject.hierarchy(dataset, pref); Matrix similarityMatrix = result.getProximityMatrix(); assertNotNull(similarityMatrix); assertEquals(similarityMatrix.rowsCount(), dataset.attributeCount()); assertEquals(similarityMatrix.columnsCount(), dataset.attributeCount()); result.getTreeData().print(); }
/** * Sample covariance * * @param dataset * @return */ protected double covariance(Dataset<E> dataset) { Matrix m = dataset.asMatrix(); Matrix cov = new SymmetricMatrixDiag(m.columnsCount()); DenseVector mean = new DenseVector(dataset.attributeCount()); for (int i = 0; i < mean.size(); i++) { mean.set(i, dataset.getAttribute(i).statistics(StatsNum.MEAN)); } Vector v; double res, sum = 0.0; for (int i = 0; i < m.rowsCount(); i++) { v = m.getRowVector(i).minus(mean); res = v.dot(v); sum += res; } return sum / (m.rowsCount() - 1); /* for (int i = 0; i < m.columnsCount(); i++) { mean = dataset.getAttribute(i).statistics(StatsNum.AVG); cov.set(i, i, dataset.getAttribute(i).statistics(StatsNum.VARIANCE)); for (int j = 0; j < i; j++) { //cov.set(i, j, mean); } } */ // return cov; }
@Override public int size() { switch (resultType) { case COLUMNS_CLUSTERING: return dataset.attributeCount(); case ROWS_CLUSTERING: return dataset.size(); } throw new RuntimeException("Don't know wether cluster rows or columns."); }