@Override
 public void processNewResult(ResultHierarchy hier, Result newResult) {
   // We may just have added this result.
   if (newResult instanceof Clustering && isReferenceResult((Clustering<?>) newResult)) {
     return;
   }
   Database db = ResultUtil.findDatabase(hier);
   List<Clustering<?>> crs = ResultUtil.getClusteringResults(newResult);
   if (crs == null || crs.size() < 1) {
     return;
   }
   // Compute the reference clustering
   Clustering<?> refc = null;
   // Try to find an existing reference clustering (globally)
   {
     Collection<Clustering<?>> cs = ResultUtil.filterResults(hier, db, Clustering.class);
     for (Clustering<?> test : cs) {
       if (isReferenceResult(test)) {
         refc = test;
         break;
       }
     }
   }
   // Try to find an existing reference clustering (locally)
   if (refc == null) {
     Collection<Clustering<?>> cs = ResultUtil.filterResults(hier, newResult, Clustering.class);
     for (Clustering<?> test : cs) {
       if (isReferenceResult(test)) {
         refc = test;
         break;
       }
     }
   }
   if (refc == null) {
     LOG.debug("Generating a new reference clustering.");
     Result refres = referencealg.run(db);
     List<Clustering<?>> refcrs = ResultUtil.getClusteringResults(refres);
     if (refcrs.size() == 0) {
       LOG.warning("Reference algorithm did not return a clustering result!");
       return;
     }
     if (refcrs.size() > 1) {
       LOG.warning("Reference algorithm returned more than one result!");
     }
     refc = refcrs.get(0);
   } else {
     LOG.debug("Using existing clustering: " + refc.getLongName() + " " + refc.getShortName());
   }
   for (Clustering<?> c : crs) {
     if (c == refc) {
       continue;
     }
     evaluteResult(db, c, refc);
   }
 }
Exemple #2
0
  /**
   * Generates {@code d+1}-dimensional subspace candidates from the specified {@code d}-dimensional
   * subspaces.
   *
   * @param subspaces the {@code d}-dimensional subspaces
   * @return the {@code d+1}-dimensional subspace candidates
   */
  private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) {
    List<Subspace> candidates = new ArrayList<>();

    if (subspaces.isEmpty()) {
      return candidates;
    }

    // Generate (d+1)-dimensional candidate subspaces
    int d = subspaces.get(0).dimensionality();

    StringBuilder msgFine = new StringBuilder("\n");
    if (LOG.isDebuggingFiner()) {
      msgFine.append("subspaces ").append(subspaces).append('\n');
    }

    for (int i = 0; i < subspaces.size(); i++) {
      Subspace s1 = subspaces.get(i);
      for (int j = i + 1; j < subspaces.size(); j++) {
        Subspace s2 = subspaces.get(j);
        Subspace candidate = s1.join(s2);

        if (candidate != null) {
          if (LOG.isDebuggingFiner()) {
            msgFine.append("candidate: ").append(candidate.dimensonsToString()).append('\n');
          }
          // prune irrelevant candidate subspaces
          List<Subspace> lowerSubspaces = lowerSubspaces(candidate);
          if (LOG.isDebuggingFiner()) {
            msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append('\n');
          }
          boolean irrelevantCandidate = false;
          for (Subspace s : lowerSubspaces) {
            if (!subspaces.contains(s)) {
              irrelevantCandidate = true;
              break;
            }
          }
          if (!irrelevantCandidate) {
            candidates.add(candidate);
          }
        }
      }
    }

    if (LOG.isDebuggingFiner()) {
      LOG.debugFiner(msgFine.toString());
    }
    if (LOG.isDebugging()) {
      StringBuilder msg = new StringBuilder();
      msg.append(d + 1).append("-dimensional candidate subspaces: ");
      for (Subspace candidate : candidates) {
        msg.append(candidate.dimensonsToString()).append(' ');
      }
      LOG.debug(msg.toString());
    }

    return candidates;
  }
 protected void autoEvaluateOutliers(ResultHierarchy hier, Result newResult) {
   Collection<OutlierResult> outliers =
       ResultUtil.filterResults(hier, newResult, OutlierResult.class);
   if (LOG.isDebugging()) {
     LOG.debug("Number of new outlier results: " + outliers.size());
   }
   if (outliers.size() > 0) {
     Database db = ResultUtil.findDatabase(hier);
     ResultUtil.ensureClusteringResult(db, db);
     Collection<Clustering<?>> clusterings = ResultUtil.filterResults(hier, db, Clustering.class);
     if (clusterings.size() == 0) {
       LOG.warning(
           "Could not find a clustering result, even after running 'ensureClusteringResult'?!?");
       return;
     }
     Clustering<?> basec = clusterings.iterator().next();
     // Find minority class label
     int min = Integer.MAX_VALUE;
     int total = 0;
     String label = null;
     if (basec.getAllClusters().size() > 1) {
       for (Cluster<?> c : basec.getAllClusters()) {
         final int csize = c.getIDs().size();
         total += csize;
         if (csize < min) {
           min = csize;
           label = c.getName();
         }
       }
     }
     if (label == null) {
       LOG.warning("Could not evaluate outlier results, as I could not find a minority label.");
       return;
     }
     if (min == 1) {
       LOG.warning(
           "The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column.");
     }
     if (min > 0.05 * total) {
       LOG.warning(
           "The minority class I discovered (labeled '"
               + label
               + "') has "
               + (min * 100. / total)
               + "% of objects. Outlier classes should be more rare!");
     }
     LOG.verbose("Evaluating using minority class: " + label);
     Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$");
     // Evaluate rankings.
     new OutlierRankingEvaluation(pat).processNewResult(hier, newResult);
     // Compute ROC curve
     new OutlierROCCurve(pat).processNewResult(hier, newResult);
     // Compute Precision at k
     new OutlierPrecisionAtKCurve(pat, min << 1).processNewResult(hier, newResult);
     // Compute ROC curve
     new OutlierPrecisionRecallCurve(pat).processNewResult(hier, newResult);
     // Compute outlier histogram
     new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false)
         .processNewResult(hier, newResult);
   }
 }