Example #1
0
 private Cluster makeCluster(Clustering clusters) {
   Cluster clust = clusters.createCluster();
   clust.setColor(colorGenerator.next());
   clust.setName("cluster " + (num + 1));
   clust.setClusterId(num++);
   clust.setParent(getDataset());
   clust.setAttributes(getDataset().getAttributes());
   return clust;
 }
Example #2
0
  @Override
  public Clustering<E, C> reduce(
      Clustering[] clusts, Algorithm<E, C> alg, ColorGenerator cg, Props props) {
    int k = props.getInt(KMeans.K);

    Clustering<E, C> result = new ClusterList<>(k); // reducer - find consensus
    // vote about final result
    E curr;
    Iterator<E> it = clusts[0].instancesIterator();
    Cluster<E> cluster;
    int[][] mapping = findMapping(clusts, k, alg.getDistanceFunction());

    if (cg != null) {
      cg.reset();
    }

    int idx;
    while (it.hasNext()) {
      curr = it.next();
      int[] assign = new int[k];
      for (int i = 0; i < clusts.length; i++) {
        cluster = clusts[i].assignedCluster(curr);
        if (i > 0) {
          assign[map(mapping, i, cluster.getClusterId())]++;
        } else {
          assign[cluster.getClusterId()]++;
        }
      }
      idx = findMax(assign);
      // check if cluster already exists
      if (!result.hasAt(idx)) {
        result.createCluster(idx);
        if (cg != null) {
          result.get(idx).setColor(cg.next());
        }
      }
      // final cluster assignment
      result.get(idx).add(curr);
    }
    result.compact();

    return result;
  }
Example #3
0
 @Override
 public Clustering updateCutoff(double cutoff) {
   this.cutoff = cutoff;
   int[] assign = new int[dataset.size()];
   int estClusters = (int) Math.sqrt(dataset.size());
   colorGenerator.reset();
   num = 0; // human readable
   Clustering clusters = new ClusterList(estClusters);
   DendroNode root = treeData.getRoot();
   if (root != null) {
     checkCutoff(root, cutoff, clusters, assign);
     if (clusters.size() > 0) {
       mapping = assign;
     } else {
       LOG.info("failed to cutoff dendrogram, cut = {}", cutoff);
     }
   }
   // add input dataset to clustering lookup
   if (noise != null) {
     Cluster clust = new BaseCluster<>(noise.size());
     clust.setColor(colorGenerator.next());
     clust.setClusterId(num++);
     clust.setParent(getDataset());
     clust.setName("Noise");
     clust.setAttributes(getDataset().getAttributes());
     for (Instance ins : noise) {
       clust.add(ins);
       mapping[ins.getIndex()] = num - 1;
     }
     clusters.add(clust);
   }
   clusters.lookupAdd(dataset);
   if (dendroMapping != null) {
     clusters.lookupAdd(dendroMapping);
   }
   clusters.lookupAdd(this);
   return clusters;
 }
Example #4
0
  @Override
  public Clustering<E, C> reduce(
      Clustering[] clusts, Algorithm<E, C> alg, ColorGenerator cg, Props props) {
    Graph graph = createGraph(clusts);

    // degree of freedom
    double df;
    double w, attain;
    EdgeIterable neigh;
    PriorityQueue<DoubleElem> pq = new PriorityQueue<>(graph.getNodeCount());
    DoubleElem<Node> elem;
    // for each node compute attainment score
    for (Node node : graph.getNodes()) {
      neigh = graph.getEdges(node);
      df = neigh.size();
      w = 0.0;
      for (Edge ne : neigh) {
        w += ne.getWeight();
      }
      attain = w / df;
      elem = new DoubleElem<>(node, attain);
      pq.add(elem);
    }

    // number of clusters is just a hint
    int k = props.getInt(KMeans.K, 5);
    double relax = props.getDouble(RELAX, 0.5);
    Clustering<E, C> result = new ClusterList(k);
    Dataset<? extends Instance> dataset = clusts[0].getLookup().lookup(Dataset.class);
    result.lookupAdd(dataset);
    ObjectOpenHashSet<Node> blacklist = new ObjectOpenHashSet();
    Node node, other;
    Cluster curr;
    double maxW;
    while (!pq.isEmpty()) {
      elem = pq.poll();
      node = elem.getElem();
      if (!blacklist.contains(node)) {
        blacklist.add(node);
        curr = result.createCluster();
        if (cg != null) {
          curr.setColor(cg.next());
        }
        curr.add(node.getInstance());

        EdgeIterable iter = graph.getEdges(node);
        maxW = -1;
        for (Edge ne : iter) {
          if (ne.getWeight() > maxW) {
            maxW = ne.getWeight();
          }
        }
        // add immediate neighbours with max weight to same cluster
        if (maxW >= 0.0) {
          for (Edge ne : iter) {
            // when relax set to 0.0, only items with maximum weight
            // will be added to the same cluster
            w = ne.getWeight() + relax * ne.getWeight();
            if (w >= maxW) {
              if (!node.equals(ne.getSource())) {
                other = ne.getSource();
              } else {
                other = ne.getTarget();
              }
              if (!blacklist.contains(other)) {
                curr.add(other.getInstance());
                blacklist.add(other);
              }
            }
          }
        }
      }
    }
    // TODO merge some clusters

    return result;
  }