@Override public void processNewResult(ResultHierarchy hier, Result newResult) { // We may just have added this result. if (newResult instanceof Clustering && isReferenceResult((Clustering<?>) newResult)) { return; } Database db = ResultUtil.findDatabase(hier); List<Clustering<?>> crs = ResultUtil.getClusteringResults(newResult); if (crs == null || crs.size() < 1) { return; } // Compute the reference clustering Clustering<?> refc = null; // Try to find an existing reference clustering (globally) { Collection<Clustering<?>> cs = ResultUtil.filterResults(hier, db, Clustering.class); for (Clustering<?> test : cs) { if (isReferenceResult(test)) { refc = test; break; } } } // Try to find an existing reference clustering (locally) if (refc == null) { Collection<Clustering<?>> cs = ResultUtil.filterResults(hier, newResult, Clustering.class); for (Clustering<?> test : cs) { if (isReferenceResult(test)) { refc = test; break; } } } if (refc == null) { LOG.debug("Generating a new reference clustering."); Result refres = referencealg.run(db); List<Clustering<?>> refcrs = ResultUtil.getClusteringResults(refres); if (refcrs.size() == 0) { LOG.warning("Reference algorithm did not return a clustering result!"); return; } if (refcrs.size() > 1) { LOG.warning("Reference algorithm returned more than one result!"); } refc = refcrs.get(0); } else { LOG.debug("Using existing clustering: " + refc.getLongName() + " " + refc.getShortName()); } for (Clustering<?> c : crs) { if (c == refc) { continue; } evaluteResult(db, c, refc); } }
/** * Generates {@code d+1}-dimensional subspace candidates from the specified {@code d}-dimensional * subspaces. * * @param subspaces the {@code d}-dimensional subspaces * @return the {@code d+1}-dimensional subspace candidates */ private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) { List<Subspace> candidates = new ArrayList<>(); if (subspaces.isEmpty()) { return candidates; } // Generate (d+1)-dimensional candidate subspaces int d = subspaces.get(0).dimensionality(); StringBuilder msgFine = new StringBuilder("\n"); if (LOG.isDebuggingFiner()) { msgFine.append("subspaces ").append(subspaces).append('\n'); } for (int i = 0; i < subspaces.size(); i++) { Subspace s1 = subspaces.get(i); for (int j = i + 1; j < subspaces.size(); j++) { Subspace s2 = subspaces.get(j); Subspace candidate = s1.join(s2); if (candidate != null) { if (LOG.isDebuggingFiner()) { msgFine.append("candidate: ").append(candidate.dimensonsToString()).append('\n'); } // prune irrelevant candidate subspaces List<Subspace> lowerSubspaces = lowerSubspaces(candidate); if (LOG.isDebuggingFiner()) { msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append('\n'); } boolean irrelevantCandidate = false; for (Subspace s : lowerSubspaces) { if (!subspaces.contains(s)) { irrelevantCandidate = true; break; } } if (!irrelevantCandidate) { candidates.add(candidate); } } } } if (LOG.isDebuggingFiner()) { LOG.debugFiner(msgFine.toString()); } if (LOG.isDebugging()) { StringBuilder msg = new StringBuilder(); msg.append(d + 1).append("-dimensional candidate subspaces: "); for (Subspace candidate : candidates) { msg.append(candidate.dimensonsToString()).append(' '); } LOG.debug(msg.toString()); } return candidates; }
protected void autoEvaluateOutliers(ResultHierarchy hier, Result newResult) { Collection<OutlierResult> outliers = ResultUtil.filterResults(hier, newResult, OutlierResult.class); if (LOG.isDebugging()) { LOG.debug("Number of new outlier results: " + outliers.size()); } if (outliers.size() > 0) { Database db = ResultUtil.findDatabase(hier); ResultUtil.ensureClusteringResult(db, db); Collection<Clustering<?>> clusterings = ResultUtil.filterResults(hier, db, Clustering.class); if (clusterings.size() == 0) { LOG.warning( "Could not find a clustering result, even after running 'ensureClusteringResult'?!?"); return; } Clustering<?> basec = clusterings.iterator().next(); // Find minority class label int min = Integer.MAX_VALUE; int total = 0; String label = null; if (basec.getAllClusters().size() > 1) { for (Cluster<?> c : basec.getAllClusters()) { final int csize = c.getIDs().size(); total += csize; if (csize < min) { min = csize; label = c.getName(); } } } if (label == null) { LOG.warning("Could not evaluate outlier results, as I could not find a minority label."); return; } if (min == 1) { LOG.warning( "The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column."); } if (min > 0.05 * total) { LOG.warning( "The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!"); } LOG.verbose("Evaluating using minority class: " + label); Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$"); // Evaluate rankings. new OutlierRankingEvaluation(pat).processNewResult(hier, newResult); // Compute ROC curve new OutlierROCCurve(pat).processNewResult(hier, newResult); // Compute Precision at k new OutlierPrecisionAtKCurve(pat, min << 1).processNewResult(hier, newResult); // Compute ROC curve new OutlierPrecisionRecallCurve(pat).processNewResult(hier, newResult); // Compute outlier histogram new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false) .processNewResult(hier, newResult); } }