@Override public Clustering getClustering(Dataset<E> parent) { setDataset(parent); int estClusters = (int) Math.sqrt(dataset.size()); Clustering result = new ClusterList(estClusters); // estimated capacity int perCluster = (int) (parent.size() / (float) estClusters); int[] assign = getMapping(); if (assign != null) { int id; Cluster clust; for (int i = 0; i < assign.length; i++) { id = assign[i]; clust = result.createCluster(id, perCluster); clust.add(dataset.get(i)); } } else { // try some cutoff method? throw new RuntimeException("don't know how to get clusters.."); } // proximity.printLower(5, 2); // similarity.print(4, 2); result.lookupAdd(dataset); if (props != null) { result.setParams(props); } return result; }
/** TODO: make sure this test is correct */ @Ignore public void testScore() throws ScoreException { Clustering c = new ClusterList(2); Dataset<? extends Instance> d = new ArrayDataset(8, 2); d.builder().create(new double[] {0, 0}, "0"); d.builder().create(new double[] {0, 0}, "0"); d.builder().create(new double[] {0, 0}, "0"); d.builder().create(new double[] {1, 1}, "0"); d.builder().create(new double[] {1, 1}, "1"); d.builder().create(new double[] {1, 1}, "1"); d.builder().create(new double[] {1, 1}, "1"); d.builder().create(new double[] {1, 1}, "1"); assertEquals(8, d.size()); Cluster a = c.createCluster(0, 4); Cluster b = c.createCluster(1, 4); for (int i = 0; i < 4; i++) { a.add(d.get(i)); b.add(d.get(i + 4)); } assertEquals(2, c.size()); assertEquals(0.14039740914097984, subject.score(c), delta); }