/** * Utility method to create a subspace cluster from a list of DBIDs and the relevant attributes. * * @param relation to compute a centroid. * @param C the cluster points. * @param D the relevant dimensions. * @return an object representing the subspace cluster. */ private Cluster<SubspaceModel> makeCluster(Relation<V> relation, DBIDs C, long[] D) { DBIDs ids = DBIDUtil.newHashSet(C); // copy, also to lose distance values! Cluster<SubspaceModel> cluster = new Cluster<>(ids); cluster.setModel( new SubspaceModel(new Subspace(D), Centroid.make(relation, ids).getArrayRef())); return cluster; }
/** * Runs the DBSCAN algorithm on the specified partition of the database in the given subspace. If * parameter {@code ids} is null DBSCAN will be applied to the whole database. * * @param relation the database holding the objects to run DBSCAN on * @param ids the IDs of the database defining the partition to run DBSCAN on - if this parameter * is null DBSCAN will be applied to the whole database * @param subspace the subspace to run DBSCAN on * @return the clustering result of the DBSCAN run */ private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) { // distance function distanceFunction.setSelectedDimensions(subspace.getDimensions()); ProxyDatabase proxy; if (ids == null) { // TODO: in this case, we might want to use an index - the proxy below // will prevent this! ids = relation.getDBIDs(); } proxy = new ProxyDatabase(ids, relation); DBSCAN<V> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts); // run DBSCAN if (LOG.isVerbose()) { LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString()); } Clustering<Model> dbsres = dbscan.run(proxy); // separate cluster and noise List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters(); List<Cluster<Model>> clusters = new ArrayList<>(); for (Cluster<Model> c : clusterAndNoise) { if (!c.isNoise()) { clusters.add(c); } } return clusters; }
protected double[] computeWithinDistances( Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, int withinPairs) { double[] concordant = new double[withinPairs]; int i = 0; for (Cluster<?> cluster : clusters) { if (cluster.size() <= 1 || cluster.isNoise()) { switch (noiseHandling) { case IGNORE_NOISE: continue; case TREAT_NOISE_AS_SINGLETONS: continue; // No concordant distances. case MERGE_NOISE: break; // Treat like a cluster below. } } for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) { NumberVector obj = rel.get(it1); for (DBIDIter it2 = cluster.getIDs().iter(); it2.valid(); it2.advance()) { if (DBIDUtil.compare(it1, it2) <= 0) { continue; } concordant[i++] = distanceFunction.distance(obj, rel.get(it2)); } } } assert (concordant.length == i); Arrays.sort(concordant); return concordant; }
/** * Process a database * * @param database Database to process * @param relation Relation to process * @return Histogram of ranking qualities */ public HistogramResult<DoubleVector> run(Database database, Relation<O> relation) { final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction()); final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, relation.size()); if (LOG.isVerbose()) { LOG.verbose("Preprocessing clusters..."); } // Cluster by labels Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters(); DoubleStaticHistogram hist = new DoubleStaticHistogram(numbins, 0.0, 1.0); if (LOG.isVerbose()) { LOG.verbose("Processing points..."); } FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null; MeanVariance mv = new MeanVariance(); // sort neighbors for (Cluster<?> clus : split) { for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) { KNNList knn = knnQuery.getKNNForDBID(iter, relation.size()); double result = new ROCEvaluation().evaluate(clus, knn); mv.put(result); hist.increment(result, 1. / relation.size()); LOG.incrementProcessed(progress); } } LOG.ensureCompleted(progress); // Transform Histogram into a Double Vector array. Collection<DoubleVector> res = new ArrayList<>(relation.size()); for (DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) { DoubleVector row = new DoubleVector(new double[] {iter.getCenter(), iter.getValue()}); res.add(row); } HistogramResult<DoubleVector> result = new HistogramResult<>("Ranking Quality Histogram", "ranking-histogram", res); result.addHeader("Mean: " + mv.getMean() + " Variance: " + mv.getSampleVariance()); return result; }
/** * Compute centroids. * * @param rel Data relation * @param clusters Clusters * @param centroids Output array for centroids * @return Number of ignored noise elements. */ public static int centroids( Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, NumberVector[] centroids, NoiseHandling noiseOption) { assert (centroids.length == clusters.size()); int ignorednoise = 0; Iterator<? extends Cluster<?>> ci = clusters.iterator(); for (int i = 0; ci.hasNext(); i++) { Cluster<?> cluster = ci.next(); if (cluster.size() <= 1 || cluster.isNoise()) { switch (noiseOption) { case IGNORE_NOISE: ignorednoise += cluster.size(); case TREAT_NOISE_AS_SINGLETONS: centroids[i] = null; continue; case MERGE_NOISE: break; // Treat as cluster below } } centroids[i] = ModelUtil.getPrototypeOrCentroid(cluster.getModel(), rel, cluster.getIDs()); } return ignorednoise; }
/** * Determines the {@code d}-dimensional subspace of the {@code (d+1)} -dimensional candidate with * minimal number of objects in the cluster. * * @param subspaces the list of {@code d}-dimensional subspaces containing clusters * @param candidate the {@code (d+1)}-dimensional candidate subspace * @param clusterMap the mapping of subspaces to clusters * @return the {@code d}-dimensional subspace of the {@code (d+1)} -dimensional candidate with * minimal number of objects in the cluster */ private Subspace bestSubspace( List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) { Subspace bestSubspace = null; for (Subspace subspace : subspaces) { int min = Integer.MAX_VALUE; if (subspace.isSubspace(candidate)) { List<Cluster<Model>> clusters = clusterMap.get(subspace); for (Cluster<Model> cluster : clusters) { int clusterSize = cluster.size(); if (clusterSize < min) { min = clusterSize; bestSubspace = subspace; } } } } return bestSubspace; }
/** * Performs the SUBCLU algorithm on the given database. * * @param relation Relation to process * @return Clustering result */ public Clustering<SubspaceModel> run(Relation<V> relation) { final int dimensionality = RelationUtil.dimensionality(relation); StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null; // Generate all 1-dimensional clusters LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters."); // mapping of dimensionality to set of subspaces HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>(); // list of 1-dimensional subspaces containing clusters List<Subspace> s_1 = new ArrayList<>(); subspaceMap.put(0, s_1); // mapping of subspaces to list of clusters TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator()); for (int d = 0; d < dimensionality; d++) { Subspace currentSubspace = new Subspace(d); List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace); if (LOG.isDebuggingFiner()) { StringBuilder msg = new StringBuilder(); msg.append('\n') .append(clusters.size()) .append(" clusters in subspace ") .append(currentSubspace.dimensonsToString()) .append(": \n"); for (Cluster<Model> cluster : clusters) { msg.append(" " + cluster.getIDs() + "\n"); } LOG.debugFiner(msg.toString()); } if (!clusters.isEmpty()) { s_1.add(currentSubspace); clusterMap.put(currentSubspace, clusters); } } // Generate (d+1)-dimensional clusters from d-dimensional clusters for (int d = 0; d < dimensionality - 1; d++) { if (stepprog != null) { stepprog.beginStep( d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG); } List<Subspace> subspaces = subspaceMap.get(d); if (subspaces == null || subspaces.isEmpty()) { if (stepprog != null) { for (int dim = d + 1; dim < dimensionality - 1; dim++) { stepprog.beginStep( dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG); } } break; } List<Subspace> candidates = generateSubspaceCandidates(subspaces); List<Subspace> s_d = new ArrayList<>(); for (Subspace candidate : candidates) { Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap); if (LOG.isDebuggingFine()) { LOG.debugFine( "best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString()); } List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace); List<Cluster<Model>> clusters = new ArrayList<>(); for (Cluster<Model> cluster : bestSubspaceClusters) { List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate); if (!candidateClusters.isEmpty()) { clusters.addAll(candidateClusters); } } if (LOG.isDebuggingFine()) { StringBuilder msg = new StringBuilder(); msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n"); for (Cluster<Model> c : clusters) { msg.append(" " + c.getIDs() + "\n"); } LOG.debugFine(msg.toString()); } if (!clusters.isEmpty()) { s_d.add(candidate); clusterMap.put(candidate, clusters); } } if (!s_d.isEmpty()) { subspaceMap.put(d + 1, s_d); } } // build result int numClusters = 1; result = new Clustering<>("SUBCLU clustering", "subclu-clustering"); for (Subspace subspace : clusterMap.descendingKeySet()) { List<Cluster<Model>> clusters = clusterMap.get(subspace); for (Cluster<Model> cluster : clusters) { Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs()); newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs()))); newCluster.setName("cluster_" + numClusters++); result.addToplevelCluster(newCluster); } } LOG.setCompleted(stepprog); return result; }
/** * Evaluate a single clustering. * * @param db Database * @param rel Data relation * @param c Clustering * @return Gamma index */ public double evaluateClustering( Database db, Relation<? extends NumberVector> rel, Clustering<?> c) { List<? extends Cluster<?>> clusters = c.getAllClusters(); int ignorednoise = 0, withinPairs = 0; for (Cluster<?> cluster : clusters) { if ((cluster.size() <= 1 || cluster.isNoise())) { switch (noiseHandling) { case IGNORE_NOISE: ignorednoise += cluster.size(); continue; case TREAT_NOISE_AS_SINGLETONS: continue; // No concordant distances. case MERGE_NOISE: break; // Treat like a cluster below. } } withinPairs += (cluster.size() * (cluster.size() - 1)) >>> 1; if (withinPairs < 0) { throw new AbortException( "Integer overflow - clusters too large to compute pairwise distances."); } } // Materialize within-cluster distances (sorted): double[] withinDistances = computeWithinDistances(rel, clusters, withinPairs); int[] withinTies = new int[withinDistances.length]; // Count ties within countTies(withinDistances, withinTies); long concordantPairs = 0, discordantPairs = 0, betweenPairs = 0; // Step two, compute discordant distances: for (int i = 0; i < clusters.size(); i++) { Cluster<?> ocluster1 = clusters.get(i); if ((ocluster1.size() <= 1 || ocluster1.isNoise()) // && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) { continue; } for (int j = i + 1; j < clusters.size(); j++) { Cluster<?> ocluster2 = clusters.get(j); if ((ocluster2.size() <= 1 || ocluster2.isNoise()) // && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) { continue; } betweenPairs += ocluster1.size() * ocluster2.size(); for (DBIDIter oit1 = ocluster1.getIDs().iter(); oit1.valid(); oit1.advance()) { NumberVector obj = rel.get(oit1); for (DBIDIter oit2 = ocluster2.getIDs().iter(); oit2.valid(); oit2.advance()) { double dist = distanceFunction.distance(obj, rel.get(oit2)); int p = Arrays.binarySearch(withinDistances, dist); if (p >= 0) { // Tied distances: while (p > 0 && withinDistances[p - 1] >= dist) { --p; } concordantPairs += p; discordantPairs += withinDistances.length - p - withinTies[p]; continue; } p = -p - 1; concordantPairs += p; discordantPairs += withinDistances.length - p; } } } } // Total number of pairs possible: final long t = ((rel.size() - ignorednoise) * (long) (rel.size() - ignorednoise - 1)) >>> 1; final long tt = (t * (t - 1)) >>> 1; final double gamma = (concordantPairs - discordantPairs) / (double) (concordantPairs + discordantPairs); final double tau = computeTau(concordantPairs, discordantPairs, tt, withinDistances.length, betweenPairs); if (LOG.isStatistics()) { LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString())); if (ignorednoise > 0) { LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise)); } LOG.statistics(new DoubleStatistic(key + ".gamma", gamma)); LOG.statistics(new DoubleStatistic(key + ".tau", tau)); } EvaluationResult ev = EvaluationResult.findOrCreate( db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation"); MeasurementGroup g = ev.findOrCreateGroup("Concordance-based Evaluation"); g.addMeasure("Gamma", gamma, -1., 1., 0., false); g.addMeasure("Tau", tau, -1., +1., 0., false); db.getHierarchy().resultChanged(ev); return gamma; }
/** * Performs the DOC or FastDOC (as configured) algorithm on the given Database. * * <p>This will run exhaustively, i.e. run DOC until no clusters are found anymore / the database * size has shrunk below the threshold for minimum cluster size. * * @param database Database * @param relation Data relation */ public Clustering<SubspaceModel> run(Database database, Relation<V> relation) { // Dimensionality of our set. final int d = RelationUtil.dimensionality(relation); // Get available DBIDs as a set we can remove items from. ArrayModifiableDBIDs S = DBIDUtil.newArray(relation.getDBIDs()); // Precompute values as described in Figure 2. double r = Math.abs(Math.log(d + d) / Math.log(beta * .5)); // Outer loop count. int n = (int) (2. / alpha); // Inner loop count. int m = (int) (Math.pow(2. / alpha, r) * Math.log(4)); if (heuristics) { m = Math.min(m, Math.min(1000000, d * d)); } // Minimum size for a cluster for it to be accepted. int minClusterSize = (int) (alpha * S.size()); // List of all clusters we found. Clustering<SubspaceModel> result = new Clustering<>("DOC Clusters", "DOC"); // Inform the user about the number of actual clusters found so far. IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null; // To not only find a single cluster, we continue running until our set // of points is empty. while (S.size() > minClusterSize) { Cluster<SubspaceModel> C; if (heuristics) { C = runFastDOC(database, relation, S, d, n, m, (int) r); } else { C = runDOC(database, relation, S, d, n, m, (int) r, minClusterSize); } if (C == null) { // Stop trying if we couldn't find a cluster. break; } // Found a cluster, remember it, remove its points from the set. result.addToplevelCluster(C); // Remove all points of the cluster from the set and continue. S.removeDBIDs(C.getIDs()); if (cprogress != null) { cprogress.setProcessed(result.getAllClusters().size(), LOG); } } // Add the remainder as noise. if (S.size() > 0) { long[] alldims = BitsUtil.ones(d); result.addToplevelCluster( new Cluster<>( S, true, new SubspaceModel(new Subspace(alldims), Centroid.make(relation, S).getArrayRef()))); } LOG.setCompleted(cprogress); return result; }
/** * Evaluate a single clustering. * * @param db Database * @param rel Data relation * @param c Clustering * @return Mean simplified silhouette */ public double evaluateClustering( Database db, Relation<? extends NumberVector> rel, Clustering<?> c) { List<? extends Cluster<?>> clusters = c.getAllClusters(); NumberVector[] centroids = new NumberVector[clusters.size()]; int ignorednoise = centroids(rel, clusters, centroids, noiseOption); MeanVariance mssil = new MeanVariance(); Iterator<? extends Cluster<?>> ci = clusters.iterator(); for (int i = 0; ci.hasNext(); i++) { Cluster<?> cluster = ci.next(); if (cluster.size() <= 1) { // As suggested in Rousseeuw, we use 0 for singletons. mssil.put(0., cluster.size()); continue; } if (cluster.isNoise()) { switch (noiseOption) { case IGNORE_NOISE: continue; // Ignore elements case TREAT_NOISE_AS_SINGLETONS: // As suggested in Rousseeuw, we use 0 for singletons. mssil.put(0., cluster.size()); continue; case MERGE_NOISE: break; // Treat as cluster below } } // Cluster center: final NumberVector center = centroids[i]; assert (center != null); for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) { NumberVector obj = rel.get(it); // a: Distance to own centroid double a = distance.distance(center, obj); // b: Distance to other clusters centroids: double min = Double.POSITIVE_INFINITY; Iterator<? extends Cluster<?>> cj = clusters.iterator(); for (int j = 0; cj.hasNext(); j++) { Cluster<?> ocluster = cj.next(); if (i == j) { continue; } NumberVector other = centroids[j]; if (other == null) { // Noise! switch (noiseOption) { case IGNORE_NOISE: continue; case TREAT_NOISE_AS_SINGLETONS: // Treat each object like a centroid! for (DBIDIter it2 = ocluster.getIDs().iter(); it2.valid(); it2.advance()) { double dist = distance.distance(rel.get(it2), obj); min = dist < min ? dist : min; } continue; case MERGE_NOISE: break; // Treat as cluster below, but should not be reachable. } } // Clusters: use centroid. double dist = distance.distance(other, obj); min = dist < min ? dist : min; } // One 'real' cluster only? min = min < Double.POSITIVE_INFINITY ? min : a; mssil.put((min - a) / (min > a ? min : a)); } } double penalty = 1.; // Only if {@link NoiseHandling#IGNORE_NOISE}: if (penalize && ignorednoise > 0) { penalty = (rel.size() - ignorednoise) / (double) rel.size(); } final double meanssil = penalty * mssil.getMean(); final double stdssil = penalty * mssil.getSampleStddev(); if (LOG.isStatistics()) { LOG.statistics( new StringStatistic( key + ".simplified-silhouette.noise-handling", noiseOption.toString())); if (ignorednoise > 0) { LOG.statistics(new LongStatistic(key + ".simplified-silhouette.ignored", ignorednoise)); } LOG.statistics(new DoubleStatistic(key + ".simplified-silhouette.mean", meanssil)); LOG.statistics(new DoubleStatistic(key + ".simplified-silhouette.stddev", stdssil)); } EvaluationResult ev = EvaluationResult.findOrCreate( db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation"); MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation"); g.addMeasure( "Simp. Silhouette +-" + FormatUtil.NF2.format(stdssil), meanssil, -1., 1., 0., false); db.getHierarchy().resultChanged(ev); return meanssil; }
protected void autoEvaluateOutliers(ResultHierarchy hier, Result newResult) { Collection<OutlierResult> outliers = ResultUtil.filterResults(hier, newResult, OutlierResult.class); if (LOG.isDebugging()) { LOG.debug("Number of new outlier results: " + outliers.size()); } if (outliers.size() > 0) { Database db = ResultUtil.findDatabase(hier); ResultUtil.ensureClusteringResult(db, db); Collection<Clustering<?>> clusterings = ResultUtil.filterResults(hier, db, Clustering.class); if (clusterings.size() == 0) { LOG.warning( "Could not find a clustering result, even after running 'ensureClusteringResult'?!?"); return; } Clustering<?> basec = clusterings.iterator().next(); // Find minority class label int min = Integer.MAX_VALUE; int total = 0; String label = null; if (basec.getAllClusters().size() > 1) { for (Cluster<?> c : basec.getAllClusters()) { final int csize = c.getIDs().size(); total += csize; if (csize < min) { min = csize; label = c.getName(); } } } if (label == null) { LOG.warning("Could not evaluate outlier results, as I could not find a minority label."); return; } if (min == 1) { LOG.warning( "The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column."); } if (min > 0.05 * total) { LOG.warning( "The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!"); } LOG.verbose("Evaluating using minority class: " + label); Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$"); // Evaluate rankings. new OutlierRankingEvaluation(pat).processNewResult(hier, newResult); // Compute ROC curve new OutlierROCCurve(pat).processNewResult(hier, newResult); // Compute Precision at k new OutlierPrecisionAtKCurve(pat, min << 1).processNewResult(hier, newResult); // Compute ROC curve new OutlierPrecisionRecallCurve(pat).processNewResult(hier, newResult); // Compute outlier histogram new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false) .processNewResult(hier, newResult); } }
/** * Evaluate a single clustering. * * @param db Database * @param rel Data relation * @param c Clustering * @return C-Index */ public double evaluateClustering( Database db, Relation<? extends O> rel, DistanceQuery<O> dq, Clustering<?> c) { List<? extends Cluster<?>> clusters = c.getAllClusters(); // theta is the sum, w the number of within group distances double theta = 0; int w = 0; int ignorednoise = 0; int isize = clusters.size() <= 1 ? rel.size() : rel.size() / (clusters.size() - 1); DoubleArray pairDists = new DoubleArray(isize); for (int i = 0; i < clusters.size(); i++) { Cluster<?> cluster = clusters.get(i); if (cluster.size() <= 1 || cluster.isNoise()) { switch (noiseOption) { case IGNORE_NOISE: ignorednoise += cluster.size(); continue; // Ignore case TREAT_NOISE_AS_SINGLETONS: continue; // No within-cluster distances! case MERGE_NOISE: break; // Treat like a cluster } } for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) { O obj = rel.get(it1); // Compare object to every cluster, but only once for (int j = i; j < clusters.size(); j++) { Cluster<?> ocluster = clusters.get(j); if (ocluster.size() <= 1 || ocluster.isNoise()) { switch (noiseOption) { case IGNORE_NOISE: continue; // Ignore this cluster. case TREAT_NOISE_AS_SINGLETONS: case MERGE_NOISE: break; // Treat like a cluster } } for (DBIDIter it2 = ocluster.getIDs().iter(); it2.valid(); it2.advance()) { if (DBIDUtil.compare(it1, it2) <= 0) { // Only once. continue; } double dist = dq.distance(obj, rel.get(it2)); pairDists.add(dist); if (ocluster == cluster) { // Within-cluster distances. theta += dist; w++; } } } } } // Simulate best and worst cases: pairDists.sort(); double min = 0, max = 0; for (int i = 0, j = pairDists.size() - 1; i < w; i++, j--) { min += pairDists.get(i); max += pairDists.get(j); } double cIndex = (max > min) ? (theta - min) / (max - min) : 0.; if (LOG.isStatistics()) { LOG.statistics(new StringStatistic(key + ".c-index.noise-handling", noiseOption.toString())); if (ignorednoise > 0) { LOG.statistics(new LongStatistic(key + ".c-index.ignored", ignorednoise)); } LOG.statistics(new DoubleStatistic(key + ".c-index", cIndex)); } EvaluationResult ev = EvaluationResult.findOrCreate( db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation"); MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation"); g.addMeasure("C-Index", cIndex, 0., 1., 0., true); db.getHierarchy().resultChanged(ev); return cIndex; }