public Graph createGraph(Clustering[] clusts) { Clustering c = clusts[0]; // total number of items int n = c.instancesCount(); Graph graph = new AdjListGraph(); Object2LongOpenHashMap<Instance> mapping = new Object2LongOpenHashMap(); Instance a, b; Node na, nb; // cluster membership int ca, cb; int x = 0; Edge edge; // accumulate evidence for (Clustering clust : clusts) { System.out.println("reducing " + (x++)); for (int i = 1; i < n; i++) { a = clust.instance(i); na = fetchNode(graph, mapping, a); ca = clust.assignedCluster(a.getIndex()); for (int j = 0; j < i; j++) { b = clust.instance(j); nb = fetchNode(graph, mapping, b); // for each pair of instances check if placed in the same cluster cb = clust.assignedCluster(b.getIndex()); if (ca == cb) { edge = graph.getEdge(na, nb); // check if exists if (edge == null) { edge = graph.getFactory().newEdge(na, nb, 0, 0, false); graph.addEdge(edge); } // increase weight by 1 edge.setWeight(edge.getWeight() + 1.0); } } } } return graph; }
@Override public Clustering<E, C> reduce( Clustering[] clusts, Algorithm<E, C> alg, ColorGenerator cg, Props props) { Graph graph = createGraph(clusts); // degree of freedom double df; double w, attain; EdgeIterable neigh; PriorityQueue<DoubleElem> pq = new PriorityQueue<>(graph.getNodeCount()); DoubleElem<Node> elem; // for each node compute attainment score for (Node node : graph.getNodes()) { neigh = graph.getEdges(node); df = neigh.size(); w = 0.0; for (Edge ne : neigh) { w += ne.getWeight(); } attain = w / df; elem = new DoubleElem<>(node, attain); pq.add(elem); } // number of clusters is just a hint int k = props.getInt(KMeans.K, 5); double relax = props.getDouble(RELAX, 0.5); Clustering<E, C> result = new ClusterList(k); Dataset<? extends Instance> dataset = clusts[0].getLookup().lookup(Dataset.class); result.lookupAdd(dataset); ObjectOpenHashSet<Node> blacklist = new ObjectOpenHashSet(); Node node, other; Cluster curr; double maxW; while (!pq.isEmpty()) { elem = pq.poll(); node = elem.getElem(); if (!blacklist.contains(node)) { blacklist.add(node); curr = result.createCluster(); if (cg != null) { curr.setColor(cg.next()); } curr.add(node.getInstance()); EdgeIterable iter = graph.getEdges(node); maxW = -1; for (Edge ne : iter) { if (ne.getWeight() > maxW) { maxW = ne.getWeight(); } } // add immediate neighbours with max weight to same cluster if (maxW >= 0.0) { for (Edge ne : iter) { // when relax set to 0.0, only items with maximum weight // will be added to the same cluster w = ne.getWeight() + relax * ne.getWeight(); if (w >= maxW) { if (!node.equals(ne.getSource())) { other = ne.getSource(); } else { other = ne.getTarget(); } if (!blacklist.contains(other)) { curr.add(other.getInstance()); blacklist.add(other); } } } } } } // TODO merge some clusters return result; }