public Instance next() {
    AgglomerativeNeighbor neighbor = null;

    if (positiveCount < positiveTarget && nonsingletonClusters.length > 0) { // Sample positive.
      positiveCount++;
      int label = nonsingletonClusters[random.nextInt(nonsingletonClusters.length)];

      int[] instances = clustering.getIndicesWithLabel(label);
      int[] subcluster = sampleFromArray(instances, random, 2);
      int[] cluster1 = new int[] {subcluster[random.nextInt(subcluster.length)]}; // Singleton.
      int[] cluster2 = new int[subcluster.length - 1];
      int nadded = 0;
      for (int i = 0; i < subcluster.length; i++)
        if (subcluster[i] != cluster1[0]) cluster2[nadded++] = subcluster[i];

      neighbor = new AgglomerativeNeighbor(clustering, clustering, cluster1, cluster2);
    } else { // Sample negative.
      int labeli = random.nextInt(clustering.getNumClusters());
      int labelj = random.nextInt(clustering.getNumClusters());
      while (labeli == labelj) labelj = random.nextInt(clustering.getNumClusters());

      int[] ii = sampleFromArray(clustering.getIndicesWithLabel(labeli), random, 1);
      int[] ij = sampleFromArray(clustering.getIndicesWithLabel(labelj), random, 1);

      neighbor =
          new AgglomerativeNeighbor(
              clustering,
              ClusterUtils.copyAndMergeClusters(clustering, labeli, labelj),
              ii,
              new int[] {ij[random.nextInt(ij.length)]});
    }
    totalCount++;
    return new Instance(neighbor, null, null, null);
  }
Exemple #2
0
  /**
   * @param clustering
   * @param i
   * @param j
   * @return The score for merging these two clusters.
   */
  protected double getScore(Clustering clustering, int i, int j) {
    if (scoreCache == null) scoreCache = new PairwiseMatrix(clustering.getNumInstances());

    int[] ci = clustering.getIndicesWithLabel(i);
    int[] cj = clustering.getIndicesWithLabel(j);
    if (scoreCache.get(ci[0], cj[0]) == 0.0) {
      double val =
          evaluator.evaluate(
              new AgglomerativeNeighbor(
                  clustering, ClusterUtils.copyAndMergeClusters(clustering, i, j), ci, cj));
      for (int ni = 0; ni < ci.length; ni++)
        for (int nj = 0; nj < cj.length; nj++) scoreCache.set(ci[ni], cj[nj], val);
    }

    return scoreCache.get(ci[0], cj[0]);
  }
Exemple #3
0
  /**
   * For each pair of clusters, calculate the score of the {@link
   * cc.mallet.cluster.neighbor_evaluator.Neighbor} that would result from merging the two clusters.
   * Choose the merge that obtains the highest score. If no merge improves score, return original
   * Clustering
   *
   * @param clustering
   * @return
   */
  public Clustering improveClustering(Clustering clustering) {
    double bestScore = Double.NEGATIVE_INFINITY;
    int[] toMerge = new int[] {-1, -1};
    for (int i = 0; i < clustering.getNumClusters(); i++) {
      for (int j = i + 1; j < clustering.getNumClusters(); j++) {
        double score = getScore(clustering, i, j);
        if (score > bestScore) {
          bestScore = score;
          toMerge[0] = i;
          toMerge[1] = j;
        }
      }
    }

    converged = (bestScore < stoppingThreshold);

    if (!(converged)) {
      progressLogger.info(
          "Merging "
              + toMerge[0]
              + "("
              + clustering.size(toMerge[0])
              + " nodes) and "
              + toMerge[1]
              + "("
              + clustering.size(toMerge[1])
              + " nodes) ["
              + bestScore
              + "] numClusters="
              + clustering.getNumClusters());
      updateScoreMatrix(clustering, toMerge[0], toMerge[1]);
      clustering = ClusterUtils.mergeClusters(clustering, toMerge[0], toMerge[1]);
    } else {
      progressLogger.info("Converged with score " + bestScore);
    }
    return clustering;
  }
Exemple #4
0
 /**
  * @param instances
  * @return A singleton clustering (each Instance in its own cluster).
  */
 public Clustering initializeClustering(InstanceList instances) {
   reset();
   return ClusterUtils.createSingletonClustering(instances);
 }