@Test public void testSingleLinkage() { Dataset<? extends Instance> dataset = FakeClustering.kumarData(); assertEquals(6, dataset.size()); Props pref = new Props(); pref.put(AlgParams.LINKAGE, SingleLinkage.name); pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING); pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true); HierarchicalResult result = subject.hierarchy(dataset, pref); Matrix similarityMatrix = result.getProximityMatrix(); assertNotNull(similarityMatrix); assertEquals(similarityMatrix.rowsCount(), dataset.size()); assertEquals(similarityMatrix.columnsCount(), dataset.size()); System.out.println("kumar - single"); DendroTreeData tree = result.getTreeData(); tree.print(); assertEquals(dataset.size(), tree.numLeaves()); DendroNode root = tree.getRoot(); assertEquals(0.21587033144922904, root.getHeight(), DELTA); int levels = tree.distinctHeights(1e-7); // TODO: in this example nodes #7 and #8 are on different level, // but their height is the same. should we consider those as different assertEquals(4, levels); }
@Test public void testColumnClustering() throws IOException { Dataset<? extends Instance> dataset = FakeClustering.schoolData(); Props pref = new Props(); pref.put(AlgParams.LINKAGE, SingleLinkage.name); pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.COLUMNS_CLUSTERING); pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true); HierarchicalResult result = subject.hierarchy(dataset, pref); Matrix similarityMatrix = result.getProximityMatrix(); assertNotNull(similarityMatrix); assertEquals(similarityMatrix.rowsCount(), dataset.attributeCount()); assertEquals(similarityMatrix.columnsCount(), dataset.attributeCount()); result.getTreeData().print(); }
@Test public void testSingleLinkageSchool() { Dataset<? extends Instance> dataset = FakeClustering.schoolData(); assertEquals(17, dataset.size()); Props pref = new Props(); pref.put(AlgParams.LINKAGE, SingleLinkage.name); pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING); HierarchicalResult result = subject.hierarchy(dataset, pref); System.out.println("school - single"); DendroTreeData tree = result.getTreeData(); tree.print(); assertEquals(dataset.size(), tree.numLeaves()); DendroNode root = tree.getRoot(); assertEquals(32.542734980330046, root.getHeight(), DELTA); assertEquals(2 * dataset.size() - 1, tree.numNodes()); assertEquals(16, tree.distinctHeights()); assertEquals(8, tree.treeLevels()); }
private void init() { resultType = ClusteringType.parse( props.getObject(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING)); cutoffStrategy = CutoffStrategyFactory.getInstance().getDefault(); if (cutoffStrategy != null) { InternalEvaluatorFactory<E, Cluster<E>> ief = InternalEvaluatorFactory.getInstance(); cutoffStrategy.setEvaluator(ief.getDefault()); } noise = null; }
@Override public double score(Cluster<E> a, Cluster<E> b, Props params) { checkClusters(a, b); GraphCluster<E> x = (GraphCluster<E>) a; GraphCluster<E> y = (GraphCluster<E>) b; double RCL = closeness.getRCL(x, y); double closenessPriority = params.getDouble(Chameleon.CLOSENESS_PRIORITY, 2.0); return interconnectivity.score(a, b, params) * Math.pow(RCL, closenessPriority); }
@Test public void testInverseSorting() { Dataset<? extends Instance> dataset = FakeClustering.kumarData(); assertEquals(6, dataset.size()); Props pref = new Props(); pref.put(AlgParams.LINKAGE, SingleLinkage.name); pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING); // inverse ordering pref.put(AlgParams.SMALLEST_FIRST, false); HierarchicalResult result = subject.hierarchy(dataset, pref); System.out.println("kumar - inverse"); DendroTreeData tree = result.getTreeData(); tree.print(); assertEquals(dataset.size(), tree.numLeaves()); DendroNode root = tree.getRoot(); assertEquals(0.10198039027185574, root.getHeight(), DELTA); assertEquals(5, tree.distinctHeights()); assertEquals(4, tree.treeLevels()); }
@Override public Clustering<E, C> reduce( Clustering[] clusts, Algorithm<E, C> alg, ColorGenerator cg, Props props) { int k = props.getInt(KMeans.K); Clustering<E, C> result = new ClusterList<>(k); // reducer - find consensus // vote about final result E curr; Iterator<E> it = clusts[0].instancesIterator(); Cluster<E> cluster; int[][] mapping = findMapping(clusts, k, alg.getDistanceFunction()); if (cg != null) { cg.reset(); } int idx; while (it.hasNext()) { curr = it.next(); int[] assign = new int[k]; for (int i = 0; i < clusts.length; i++) { cluster = clusts[i].assignedCluster(curr); if (i > 0) { assign[map(mapping, i, cluster.getClusterId())]++; } else { assign[cluster.getClusterId()]++; } } idx = findMax(assign); // check if cluster already exists if (!result.hasAt(idx)) { result.createCluster(idx); if (cg != null) { result.get(idx).setColor(cg.next()); } } // final cluster assignment result.get(idx).add(curr); } result.compact(); return result; }
@Override public String runMetis(Graph graph, int k, Props params) { long current = System.currentTimeMillis(); File file = new File("inputGraph-" + current); String filename = file.getName(); Process p; try { graph.hMetisExport(file, false); if (metisFile == null) { // fetch the file just once metisFile = getBinary(); System.out.println("metis path: " + metisFile.getAbsolutePath()); } // run metis String space = " "; StringBuilder sb = new StringBuilder(metisFile.getAbsolutePath()); sb.append(" ") .append(filename) .append(" ") .append(String.valueOf(k)) .append(space) .append("-ufactor=") .append(String.valueOf(params.getDouble(UFACTOR, 5.0))) .append(space) .append("-nruns=") .append(String.valueOf(params.getInt(NRUNS, 10))) .append(space) .append("-ptype=") .append(String.valueOf(params.get(PTYPE, "rb"))) .append(space) .append("-otype=") .append(String.valueOf(params.get(OTYPE, "cut"))) .append(space); if (params.containsKey(CTYPE)) { sb.append("-ctype=").append(params.get(CTYPE)).append(space); } if (params.containsKey(RTYPE)) { sb.append("-rtype=").append(params.get(RTYPE)).append(space); } // sb.append(String.valueOf(vcycle)).append(space); p = Runtime.getRuntime().exec(sb.toString()); p.waitFor(); if (debug) { System.out.println("cmd: " + sb.toString()); System.out.println("exit code: " + p.exitValue()); if (p.exitValue() != 1) { // System.out.println(ExtBinHelper.readFile(file)); } helper.readStdout(p); helper.readStderr(p); } file.delete(); } catch (FileNotFoundException | UnsupportedEncodingException ex) { Exceptions.printStackTrace(ex); } catch (IOException | InterruptedException ex) { Exceptions.printStackTrace(ex); } return filename; }
@Override public Clustering<E, C> reduce( Clustering[] clusts, Algorithm<E, C> alg, ColorGenerator cg, Props props) { Graph graph = createGraph(clusts); // degree of freedom double df; double w, attain; EdgeIterable neigh; PriorityQueue<DoubleElem> pq = new PriorityQueue<>(graph.getNodeCount()); DoubleElem<Node> elem; // for each node compute attainment score for (Node node : graph.getNodes()) { neigh = graph.getEdges(node); df = neigh.size(); w = 0.0; for (Edge ne : neigh) { w += ne.getWeight(); } attain = w / df; elem = new DoubleElem<>(node, attain); pq.add(elem); } // number of clusters is just a hint int k = props.getInt(KMeans.K, 5); double relax = props.getDouble(RELAX, 0.5); Clustering<E, C> result = new ClusterList(k); Dataset<? extends Instance> dataset = clusts[0].getLookup().lookup(Dataset.class); result.lookupAdd(dataset); ObjectOpenHashSet<Node> blacklist = new ObjectOpenHashSet(); Node node, other; Cluster curr; double maxW; while (!pq.isEmpty()) { elem = pq.poll(); node = elem.getElem(); if (!blacklist.contains(node)) { blacklist.add(node); curr = result.createCluster(); if (cg != null) { curr.setColor(cg.next()); } curr.add(node.getInstance()); EdgeIterable iter = graph.getEdges(node); maxW = -1; for (Edge ne : iter) { if (ne.getWeight() > maxW) { maxW = ne.getWeight(); } } // add immediate neighbours with max weight to same cluster if (maxW >= 0.0) { for (Edge ne : iter) { // when relax set to 0.0, only items with maximum weight // will be added to the same cluster w = ne.getWeight() + relax * ne.getWeight(); if (w >= maxW) { if (!node.equals(ne.getSource())) { other = ne.getSource(); } else { other = ne.getTarget(); } if (!blacklist.contains(other)) { curr.add(other.getInstance()); blacklist.add(other); } } } } } } // TODO merge some clusters return result; }