protected void autoEvaluateOutliers(ResultHierarchy hier, Result newResult) { Collection<OutlierResult> outliers = ResultUtil.filterResults(hier, newResult, OutlierResult.class); if (LOG.isDebugging()) { LOG.debug("Number of new outlier results: " + outliers.size()); } if (outliers.size() > 0) { Database db = ResultUtil.findDatabase(hier); ResultUtil.ensureClusteringResult(db, db); Collection<Clustering<?>> clusterings = ResultUtil.filterResults(hier, db, Clustering.class); if (clusterings.size() == 0) { LOG.warning( "Could not find a clustering result, even after running 'ensureClusteringResult'?!?"); return; } Clustering<?> basec = clusterings.iterator().next(); // Find minority class label int min = Integer.MAX_VALUE; int total = 0; String label = null; if (basec.getAllClusters().size() > 1) { for (Cluster<?> c : basec.getAllClusters()) { final int csize = c.getIDs().size(); total += csize; if (csize < min) { min = csize; label = c.getName(); } } } if (label == null) { LOG.warning("Could not evaluate outlier results, as I could not find a minority label."); return; } if (min == 1) { LOG.warning( "The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column."); } if (min > 0.05 * total) { LOG.warning( "The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!"); } LOG.verbose("Evaluating using minority class: " + label); Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$"); // Evaluate rankings. new OutlierRankingEvaluation(pat).processNewResult(hier, newResult); // Compute ROC curve new OutlierROCCurve(pat).processNewResult(hier, newResult); // Compute Precision at k new OutlierPrecisionAtKCurve(pat, min << 1).processNewResult(hier, newResult); // Compute ROC curve new OutlierPrecisionRecallCurve(pat).processNewResult(hier, newResult); // Compute outlier histogram new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false) .processNewResult(hier, newResult); } }