protected double[] computeWithinDistances(
      Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, int withinPairs) {
    double[] concordant = new double[withinPairs];
    int i = 0;
    for (Cluster<?> cluster : clusters) {
      if (cluster.size() <= 1 || cluster.isNoise()) {
        switch (noiseHandling) {
          case IGNORE_NOISE:
            continue;
          case TREAT_NOISE_AS_SINGLETONS:
            continue; // No concordant distances.
          case MERGE_NOISE:
            break; // Treat like a cluster below.
        }
      }

      for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) {
        NumberVector obj = rel.get(it1);
        for (DBIDIter it2 = cluster.getIDs().iter(); it2.valid(); it2.advance()) {
          if (DBIDUtil.compare(it1, it2) <= 0) {
            continue;
          }
          concordant[i++] = distanceFunction.distance(obj, rel.get(it2));
        }
      }
    }
    assert (concordant.length == i);
    Arrays.sort(concordant);
    return concordant;
  }
Beispiel #2
0
 /**
  * Refine neighbors within a subset.
  *
  * @param neighc Neighbor candidates
  * @param dbid Query object
  * @param df distance function
  * @param adjustedEps Epsilon range
  * @param kernel Kernel
  * @return Neighbors of neighbor object
  */
 private DoubleDBIDList subsetNeighborhoodQuery(
     DoubleDBIDList neighc,
     DBIDRef dbid,
     PrimitiveDistanceFunction<? super V> df,
     double adjustedEps,
     KernelDensityEstimator kernel) {
   ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
   V query = kernel.relation.get(dbid);
   for (DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
     DoubleDBIDPair p = neighbor.getPair();
     double dist = df.distance(query, kernel.relation.get(p));
     if (dist <= adjustedEps) {
       n.add(dist, p);
     }
   }
   return n;
 }
  /**
   * Evaluate a single clustering.
   *
   * @param db Database
   * @param rel Data relation
   * @param c Clustering
   * @return Gamma index
   */
  public double evaluateClustering(
      Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
    List<? extends Cluster<?>> clusters = c.getAllClusters();

    int ignorednoise = 0, withinPairs = 0;
    for (Cluster<?> cluster : clusters) {
      if ((cluster.size() <= 1 || cluster.isNoise())) {
        switch (noiseHandling) {
          case IGNORE_NOISE:
            ignorednoise += cluster.size();
            continue;
          case TREAT_NOISE_AS_SINGLETONS:
            continue; // No concordant distances.
          case MERGE_NOISE:
            break; // Treat like a cluster below.
        }
      }
      withinPairs += (cluster.size() * (cluster.size() - 1)) >>> 1;
      if (withinPairs < 0) {
        throw new AbortException(
            "Integer overflow - clusters too large to compute pairwise distances.");
      }
    }
    // Materialize within-cluster distances (sorted):
    double[] withinDistances = computeWithinDistances(rel, clusters, withinPairs);
    int[] withinTies = new int[withinDistances.length];
    // Count ties within
    countTies(withinDistances, withinTies);

    long concordantPairs = 0, discordantPairs = 0, betweenPairs = 0;

    // Step two, compute discordant distances:
    for (int i = 0; i < clusters.size(); i++) {
      Cluster<?> ocluster1 = clusters.get(i);
      if ((ocluster1.size() <= 1 || ocluster1.isNoise()) //
          && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
        continue;
      }
      for (int j = i + 1; j < clusters.size(); j++) {
        Cluster<?> ocluster2 = clusters.get(j);
        if ((ocluster2.size() <= 1 || ocluster2.isNoise()) //
            && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) {
          continue;
        }
        betweenPairs += ocluster1.size() * ocluster2.size();
        for (DBIDIter oit1 = ocluster1.getIDs().iter(); oit1.valid(); oit1.advance()) {
          NumberVector obj = rel.get(oit1);
          for (DBIDIter oit2 = ocluster2.getIDs().iter(); oit2.valid(); oit2.advance()) {
            double dist = distanceFunction.distance(obj, rel.get(oit2));
            int p = Arrays.binarySearch(withinDistances, dist);
            if (p >= 0) { // Tied distances:
              while (p > 0 && withinDistances[p - 1] >= dist) {
                --p;
              }
              concordantPairs += p;
              discordantPairs += withinDistances.length - p - withinTies[p];
              continue;
            }
            p = -p - 1;
            concordantPairs += p;
            discordantPairs += withinDistances.length - p;
          }
        }
      }
    }

    // Total number of pairs possible:
    final long t = ((rel.size() - ignorednoise) * (long) (rel.size() - ignorednoise - 1)) >>> 1;
    final long tt = (t * (t - 1)) >>> 1;

    final double gamma =
        (concordantPairs - discordantPairs) / (double) (concordantPairs + discordantPairs);
    final double tau =
        computeTau(concordantPairs, discordantPairs, tt, withinDistances.length, betweenPairs);

    if (LOG.isStatistics()) {
      LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString()));
      if (ignorednoise > 0) {
        LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise));
      }
      LOG.statistics(new DoubleStatistic(key + ".gamma", gamma));
      LOG.statistics(new DoubleStatistic(key + ".tau", tau));
    }

    EvaluationResult ev =
        EvaluationResult.findOrCreate(
            db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Concordance-based Evaluation");
    g.addMeasure("Gamma", gamma, -1., 1., 0., false);
    g.addMeasure("Tau", tau, -1., +1., 0., false);
    db.getHierarchy().resultChanged(ev);
    return gamma;
  }