예제 #1
0
파일: COMUSA.java 프로젝트: deric/clueminer
  public Graph createGraph(Clustering[] clusts) {
    Clustering c = clusts[0];
    // total number of items
    int n = c.instancesCount();

    Graph graph = new AdjListGraph();
    Object2LongOpenHashMap<Instance> mapping = new Object2LongOpenHashMap();

    Instance a, b;
    Node na, nb;
    // cluster membership
    int ca, cb;
    int x = 0;
    Edge edge;
    // accumulate evidence
    for (Clustering clust : clusts) {
      System.out.println("reducing " + (x++));
      for (int i = 1; i < n; i++) {
        a = clust.instance(i);
        na = fetchNode(graph, mapping, a);
        ca = clust.assignedCluster(a.getIndex());
        for (int j = 0; j < i; j++) {
          b = clust.instance(j);
          nb = fetchNode(graph, mapping, b);
          // for each pair of instances check if placed in the same cluster
          cb = clust.assignedCluster(b.getIndex());
          if (ca == cb) {
            edge = graph.getEdge(na, nb);
            // check if exists
            if (edge == null) {
              edge = graph.getFactory().newEdge(na, nb, 0, 0, false);
              graph.addEdge(edge);
            }
            // increase weight by 1
            edge.setWeight(edge.getWeight() + 1.0);
          }
        }
      }
    }
    return graph;
  }
예제 #2
0
파일: COMUSA.java 프로젝트: deric/clueminer
  @Override
  public Clustering<E, C> reduce(
      Clustering[] clusts, Algorithm<E, C> alg, ColorGenerator cg, Props props) {
    Graph graph = createGraph(clusts);

    // degree of freedom
    double df;
    double w, attain;
    EdgeIterable neigh;
    PriorityQueue<DoubleElem> pq = new PriorityQueue<>(graph.getNodeCount());
    DoubleElem<Node> elem;
    // for each node compute attainment score
    for (Node node : graph.getNodes()) {
      neigh = graph.getEdges(node);
      df = neigh.size();
      w = 0.0;
      for (Edge ne : neigh) {
        w += ne.getWeight();
      }
      attain = w / df;
      elem = new DoubleElem<>(node, attain);
      pq.add(elem);
    }

    // number of clusters is just a hint
    int k = props.getInt(KMeans.K, 5);
    double relax = props.getDouble(RELAX, 0.5);
    Clustering<E, C> result = new ClusterList(k);
    Dataset<? extends Instance> dataset = clusts[0].getLookup().lookup(Dataset.class);
    result.lookupAdd(dataset);
    ObjectOpenHashSet<Node> blacklist = new ObjectOpenHashSet();
    Node node, other;
    Cluster curr;
    double maxW;
    while (!pq.isEmpty()) {
      elem = pq.poll();
      node = elem.getElem();
      if (!blacklist.contains(node)) {
        blacklist.add(node);
        curr = result.createCluster();
        if (cg != null) {
          curr.setColor(cg.next());
        }
        curr.add(node.getInstance());

        EdgeIterable iter = graph.getEdges(node);
        maxW = -1;
        for (Edge ne : iter) {
          if (ne.getWeight() > maxW) {
            maxW = ne.getWeight();
          }
        }
        // add immediate neighbours with max weight to same cluster
        if (maxW >= 0.0) {
          for (Edge ne : iter) {
            // when relax set to 0.0, only items with maximum weight
            // will be added to the same cluster
            w = ne.getWeight() + relax * ne.getWeight();
            if (w >= maxW) {
              if (!node.equals(ne.getSource())) {
                other = ne.getSource();
              } else {
                other = ne.getTarget();
              }
              if (!blacklist.contains(other)) {
                curr.add(other.getInstance());
                blacklist.add(other);
              }
            }
          }
        }
      }
    }
    // TODO merge some clusters

    return result;
  }