protected double[] computeWithinDistances( Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, int withinPairs) { double[] concordant = new double[withinPairs]; int i = 0; for (Cluster<?> cluster : clusters) { if (cluster.size() <= 1 || cluster.isNoise()) { switch (noiseHandling) { case IGNORE_NOISE: continue; case TREAT_NOISE_AS_SINGLETONS: continue; // No concordant distances. case MERGE_NOISE: break; // Treat like a cluster below. } } for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) { NumberVector obj = rel.get(it1); for (DBIDIter it2 = cluster.getIDs().iter(); it2.valid(); it2.advance()) { if (DBIDUtil.compare(it1, it2) <= 0) { continue; } concordant[i++] = distanceFunction.distance(obj, rel.get(it2)); } } } assert (concordant.length == i); Arrays.sort(concordant); return concordant; }
/** * Refine neighbors within a subset. * * @param neighc Neighbor candidates * @param dbid Query object * @param df distance function * @param adjustedEps Epsilon range * @param kernel Kernel * @return Neighbors of neighbor object */ private DoubleDBIDList subsetNeighborhoodQuery( DoubleDBIDList neighc, DBIDRef dbid, PrimitiveDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) { ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size()); V query = kernel.relation.get(dbid); for (DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { DoubleDBIDPair p = neighbor.getPair(); double dist = df.distance(query, kernel.relation.get(p)); if (dist <= adjustedEps) { n.add(dist, p); } } return n; }
/** * Evaluate a single clustering. * * @param db Database * @param rel Data relation * @param c Clustering * @return Gamma index */ public double evaluateClustering( Database db, Relation<? extends NumberVector> rel, Clustering<?> c) { List<? extends Cluster<?>> clusters = c.getAllClusters(); int ignorednoise = 0, withinPairs = 0; for (Cluster<?> cluster : clusters) { if ((cluster.size() <= 1 || cluster.isNoise())) { switch (noiseHandling) { case IGNORE_NOISE: ignorednoise += cluster.size(); continue; case TREAT_NOISE_AS_SINGLETONS: continue; // No concordant distances. case MERGE_NOISE: break; // Treat like a cluster below. } } withinPairs += (cluster.size() * (cluster.size() - 1)) >>> 1; if (withinPairs < 0) { throw new AbortException( "Integer overflow - clusters too large to compute pairwise distances."); } } // Materialize within-cluster distances (sorted): double[] withinDistances = computeWithinDistances(rel, clusters, withinPairs); int[] withinTies = new int[withinDistances.length]; // Count ties within countTies(withinDistances, withinTies); long concordantPairs = 0, discordantPairs = 0, betweenPairs = 0; // Step two, compute discordant distances: for (int i = 0; i < clusters.size(); i++) { Cluster<?> ocluster1 = clusters.get(i); if ((ocluster1.size() <= 1 || ocluster1.isNoise()) // && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) { continue; } for (int j = i + 1; j < clusters.size(); j++) { Cluster<?> ocluster2 = clusters.get(j); if ((ocluster2.size() <= 1 || ocluster2.isNoise()) // && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) { continue; } betweenPairs += ocluster1.size() * ocluster2.size(); for (DBIDIter oit1 = ocluster1.getIDs().iter(); oit1.valid(); oit1.advance()) { NumberVector obj = rel.get(oit1); for (DBIDIter oit2 = ocluster2.getIDs().iter(); oit2.valid(); oit2.advance()) { double dist = distanceFunction.distance(obj, rel.get(oit2)); int p = Arrays.binarySearch(withinDistances, dist); if (p >= 0) { // Tied distances: while (p > 0 && withinDistances[p - 1] >= dist) { --p; } concordantPairs += p; discordantPairs += withinDistances.length - p - withinTies[p]; continue; } p = -p - 1; concordantPairs += p; discordantPairs += withinDistances.length - p; } } } } // Total number of pairs possible: final long t = ((rel.size() - ignorednoise) * (long) (rel.size() - ignorednoise - 1)) >>> 1; final long tt = (t * (t - 1)) >>> 1; final double gamma = (concordantPairs - discordantPairs) / (double) (concordantPairs + discordantPairs); final double tau = computeTau(concordantPairs, discordantPairs, tt, withinDistances.length, betweenPairs); if (LOG.isStatistics()) { LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString())); if (ignorednoise > 0) { LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise)); } LOG.statistics(new DoubleStatistic(key + ".gamma", gamma)); LOG.statistics(new DoubleStatistic(key + ".tau", tau)); } EvaluationResult ev = EvaluationResult.findOrCreate( db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation"); MeasurementGroup g = ev.findOrCreateGroup("Concordance-based Evaluation"); g.addMeasure("Gamma", gamma, -1., 1., 0., false); g.addMeasure("Tau", tau, -1., +1., 0., false); db.getHierarchy().resultChanged(ev); return gamma; }