private Cluster makeCluster(Clustering clusters) { Cluster clust = clusters.createCluster(); clust.setColor(colorGenerator.next()); clust.setName("cluster " + (num + 1)); clust.setClusterId(num++); clust.setParent(getDataset()); clust.setAttributes(getDataset().getAttributes()); return clust; }
@Override public Clustering<E, C> reduce( Clustering[] clusts, Algorithm<E, C> alg, ColorGenerator cg, Props props) { int k = props.getInt(KMeans.K); Clustering<E, C> result = new ClusterList<>(k); // reducer - find consensus // vote about final result E curr; Iterator<E> it = clusts[0].instancesIterator(); Cluster<E> cluster; int[][] mapping = findMapping(clusts, k, alg.getDistanceFunction()); if (cg != null) { cg.reset(); } int idx; while (it.hasNext()) { curr = it.next(); int[] assign = new int[k]; for (int i = 0; i < clusts.length; i++) { cluster = clusts[i].assignedCluster(curr); if (i > 0) { assign[map(mapping, i, cluster.getClusterId())]++; } else { assign[cluster.getClusterId()]++; } } idx = findMax(assign); // check if cluster already exists if (!result.hasAt(idx)) { result.createCluster(idx); if (cg != null) { result.get(idx).setColor(cg.next()); } } // final cluster assignment result.get(idx).add(curr); } result.compact(); return result; }
@Override public Clustering updateCutoff(double cutoff) { this.cutoff = cutoff; int[] assign = new int[dataset.size()]; int estClusters = (int) Math.sqrt(dataset.size()); colorGenerator.reset(); num = 0; // human readable Clustering clusters = new ClusterList(estClusters); DendroNode root = treeData.getRoot(); if (root != null) { checkCutoff(root, cutoff, clusters, assign); if (clusters.size() > 0) { mapping = assign; } else { LOG.info("failed to cutoff dendrogram, cut = {}", cutoff); } } // add input dataset to clustering lookup if (noise != null) { Cluster clust = new BaseCluster<>(noise.size()); clust.setColor(colorGenerator.next()); clust.setClusterId(num++); clust.setParent(getDataset()); clust.setName("Noise"); clust.setAttributes(getDataset().getAttributes()); for (Instance ins : noise) { clust.add(ins); mapping[ins.getIndex()] = num - 1; } clusters.add(clust); } clusters.lookupAdd(dataset); if (dendroMapping != null) { clusters.lookupAdd(dendroMapping); } clusters.lookupAdd(this); return clusters; }
@Override public Clustering<E, C> reduce( Clustering[] clusts, Algorithm<E, C> alg, ColorGenerator cg, Props props) { Graph graph = createGraph(clusts); // degree of freedom double df; double w, attain; EdgeIterable neigh; PriorityQueue<DoubleElem> pq = new PriorityQueue<>(graph.getNodeCount()); DoubleElem<Node> elem; // for each node compute attainment score for (Node node : graph.getNodes()) { neigh = graph.getEdges(node); df = neigh.size(); w = 0.0; for (Edge ne : neigh) { w += ne.getWeight(); } attain = w / df; elem = new DoubleElem<>(node, attain); pq.add(elem); } // number of clusters is just a hint int k = props.getInt(KMeans.K, 5); double relax = props.getDouble(RELAX, 0.5); Clustering<E, C> result = new ClusterList(k); Dataset<? extends Instance> dataset = clusts[0].getLookup().lookup(Dataset.class); result.lookupAdd(dataset); ObjectOpenHashSet<Node> blacklist = new ObjectOpenHashSet(); Node node, other; Cluster curr; double maxW; while (!pq.isEmpty()) { elem = pq.poll(); node = elem.getElem(); if (!blacklist.contains(node)) { blacklist.add(node); curr = result.createCluster(); if (cg != null) { curr.setColor(cg.next()); } curr.add(node.getInstance()); EdgeIterable iter = graph.getEdges(node); maxW = -1; for (Edge ne : iter) { if (ne.getWeight() > maxW) { maxW = ne.getWeight(); } } // add immediate neighbours with max weight to same cluster if (maxW >= 0.0) { for (Edge ne : iter) { // when relax set to 0.0, only items with maximum weight // will be added to the same cluster w = ne.getWeight() + relax * ne.getWeight(); if (w >= maxW) { if (!node.equals(ne.getSource())) { other = ne.getSource(); } else { other = ne.getTarget(); } if (!blacklist.contains(other)) { curr.add(other.getInstance()); blacklist.add(other); } } } } } } // TODO merge some clusters return result; }