Ejemplo n.º 1
0
  /**
   * Run the algorithm
   *
   * @param relation Data relation
   * @return Outlier result
   */
  public OutlierResult run(Relation<V> relation) {
    DoubleMinMax mm = new DoubleMinMax();
    // resulting scores
    WritableDoubleDataStore oscores =
        DataStoreUtil.makeDoubleStorage(
            relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);

    // Compute mean and covariance Matrix
    CovarianceMatrix temp = CovarianceMatrix.make(relation);
    double[] mean = temp.getMeanVector(relation).toArray();
    // debugFine(mean.toString());
    Matrix covarianceMatrix = temp.destroyToNaiveMatrix();
    // debugFine(covarianceMatrix.toString());
    Matrix covarianceTransposed =
        covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse();

    // Normalization factors for Gaussian PDF
    final double fakt =
        (1.0
            / (Math.sqrt(
                MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation))
                    * covarianceMatrix.det())));

    // for each object compute Mahalanobis distance
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
      double[] x = minusEquals(relation.get(iditer).toArray(), mean);
      // Gaussian PDF
      final double mDist = transposeTimesTimes(x, covarianceTransposed, x);
      final double prob = fakt * Math.exp(-mDist * .5);

      mm.put(prob);
      oscores.putDouble(iditer, prob);
    }

    final OutlierScoreMeta meta;
    if (invert) {
      double max = mm.getMax() != 0 ? mm.getMax() : 1.;
      for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        oscores.putDouble(iditer, (max - oscores.doubleValue(iditer)) / max);
      }
      meta = new BasicOutlierScoreMeta(0.0, 1.0);
    } else {
      meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY);
    }
    DoubleRelation res =
        new MaterializedDoubleRelation(
            "Gaussian Model Outlier Score", "gaussian-model-outlier", oscores, relation.getDBIDs());
    return new OutlierResult(meta, res);
  }
Ejemplo n.º 2
0
 /**
  * Compute optimal kernel bandwidth
  *
  * @param dim Dimensionality of subspace
  * @return optimal bandwidth
  */
 protected double optimalBandwidth(int dim) {
   // Pi in the publication is redundant and cancels out!
   double hopt = 8 * GammaDistribution.gamma(dim / 2.0 + 1) * (dim + 4) * MathUtil.powi(2, dim);
   return hopt * Math.pow(relation.size(), (-1. / (dim + 4)));
 }