/** * Main loop for OUTRES * * @param relation Relation to process * @return Outlier detection result */ public OutlierResult run(Relation<V> relation) { WritableDoubleDataStore ranks = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); KernelDensityEstimator kernel = new KernelDensityEstimator(relation); long[] subspace = BitsUtil.zero(kernel.dim); FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OUTRES scores", relation.size(), LOG) : null; for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { BitsUtil.zeroI(subspace); double score = outresScore(0, subspace, iditer, kernel); ranks.putDouble(iditer, score); minmax.put(score); LOG.incrementProcessed(progress); } LOG.ensureCompleted(progress); OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.); OutlierResult outresResult = new OutlierResult( meta, new MaterializedDoubleRelation("OUTRES", "outres-score", ranks, relation.getDBIDs())); return outresResult; }
/** * The main run method * * @param database Database to use (actually unused) * @param spatial Relation for neighborhood * @param relation Attributes to evaluate * @return Outlier result */ public OutlierResult run(Database database, Relation<N> spatial, Relation<O> relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, spatial); DistanceQuery<O> distFunc = getNonSpatialDistanceFunction().instantiate(relation); WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage( relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT); WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax lofminmax = new DoubleMinMax(); // Compute densities for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { DBIDs neighbors = npred.getNeighborDBIDs(iditer); double avg = 0; for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { avg += distFunc.distance(iditer, iter); } double lrd = 1 / (avg / neighbors.size()); if (Double.isNaN(lrd)) { lrd = 0; } lrds.putDouble(iditer, lrd); } // Compute density quotients for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { DBIDs neighbors = npred.getNeighborDBIDs(iditer); double avg = 0; for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { avg += lrds.doubleValue(iter); } final double lrd = (avg / neighbors.size()) / lrds.doubleValue(iditer); if (!Double.isNaN(lrd)) { lofs.putDouble(iditer, lrd); lofminmax.put(lrd); } else { lofs.putDouble(iditer, 0.0); } } // Build result representation. DoubleRelation scoreResult = new MaterializedDoubleRelation( "Spatial Outlier Factor", "sof-outlier", lofs, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta( lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); return or; }
/** * Run the algorithm on the given relation. * * @param database Database * @param relation Relation to process * @return Outlier result */ public OutlierResult run(Database database, Relation<? extends NumberVector> relation) { @SuppressWarnings("unchecked") PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction); Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation); if (refPoints.size() < 1) { throw new AbortException("Cannot compute ROS without reference points!"); } DBIDs ids = relation.getDBIDs(); if (k >= ids.size()) { throw new AbortException("k must not be chosen larger than the database size!"); } // storage of distance/score values. WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage( ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN); // Compute density estimation: for (NumberVector refPoint : refPoints) { DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq); updateDensities(rbod_score, referenceDists); } // compute maximum density DoubleMinMax mm = new DoubleMinMax(); for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { mm.put(rbod_score.doubleValue(iditer)); } // compute ROS double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.; mm.reset(); // Reuse for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { double score = 1 - (rbod_score.doubleValue(iditer) * scale); mm.put(score); rbod_score.putDouble(iditer, score); } DoubleRelation scoreResult = new MaterializedDoubleRelation( "Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); // adds reference points to the result. header information for the // visualizer to find the reference points in the result result.addChildResult( new ReferencePointsResult<>("Reference points", "reference-points", refPoints)); return result; }
/** * Run the algorithm * * @param relation Data relation * @return Outlier result */ public OutlierResult run(Relation<V> relation) { DoubleMinMax mm = new DoubleMinMax(); // resulting scores WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage( relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT); // Compute mean and covariance Matrix CovarianceMatrix temp = CovarianceMatrix.make(relation); double[] mean = temp.getMeanVector(relation).toArray(); // debugFine(mean.toString()); Matrix covarianceMatrix = temp.destroyToNaiveMatrix(); // debugFine(covarianceMatrix.toString()); Matrix covarianceTransposed = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse(); // Normalization factors for Gaussian PDF final double fakt = (1.0 / (Math.sqrt( MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * covarianceMatrix.det()))); // for each object compute Mahalanobis distance for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { double[] x = minusEquals(relation.get(iditer).toArray(), mean); // Gaussian PDF final double mDist = transposeTimesTimes(x, covarianceTransposed, x); final double prob = fakt * Math.exp(-mDist * .5); mm.put(prob); oscores.putDouble(iditer, prob); } final OutlierScoreMeta meta; if (invert) { double max = mm.getMax() != 0 ? mm.getMax() : 1.; for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { oscores.putDouble(iditer, (max - oscores.doubleValue(iditer)) / max); } meta = new BasicOutlierScoreMeta(0.0, 1.0); } else { meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY); } DoubleRelation res = new MaterializedDoubleRelation( "Gaussian Model Outlier Score", "gaussian-model-outlier", oscores, relation.getDBIDs()); return new OutlierResult(meta, res); }
@Override public CanvasSize estimateViewport() { if (viewport == null) { final int dim = proj.getDimensionality(); DoubleMinMax minmaxx = new DoubleMinMax(); DoubleMinMax minmaxy = new DoubleMinMax(); // Origin final double[] vec = new double[dim]; double[] orig = projectScaledToRender(vec); minmaxx.put(orig[0]); minmaxy.put(orig[1]); // Diagonal point Arrays.fill(vec, 1.); double[] diag = projectScaledToRender(vec); minmaxx.put(diag[0]); minmaxy.put(diag[1]); // Axis end points for (int d = 0; d < dim; d++) { Arrays.fill(vec, 0.); vec[d] = 1.; double[] ax = projectScaledToRender(vec); minmaxx.put(ax[0]); minmaxy.put(ax[1]); } viewport = new CanvasSize(minmaxx.getMin(), minmaxx.getMax(), minmaxy.getMin(), minmaxy.getMax()); } return viewport; }
/** * Run the algorithm * * @param database Database to process * @param relation Relation to process * @return Outlier result */ public OutlierResult run(Database database, Relation<O> relation) { DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction()); RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc); DBIDs ids = relation.getDBIDs(); // LOCI preprocessing step WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage( relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class); precomputeInterestingRadii(ids, rangeQuery, interestingDistances); // LOCI main step FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null; WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); // Shared instance, to save allocations. MeanVariance mv_n_r_alpha = new MeanVariance(); for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) { final DoubleIntArrayList cdist = interestingDistances.get(iditer); final double maxdist = cdist.getDouble(cdist.size() - 1); final int maxneig = cdist.getInt(cdist.size() - 1); double maxmdefnorm = 0.0; double maxnormr = 0; if (maxneig >= nmin) { // Compute the largest neighborhood we will need. DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist); // TODO: Ensure the result is sorted. This is currently implied. // For any critical distance, compute the normalized MDEF score. for (int i = 0, size = cdist.size(); i < size; i++) { // Only start when minimum size is fulfilled if (cdist.getInt(i) < nmin) { continue; } final double r = cdist.getDouble(i); final double alpha_r = alpha * r; // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!) final int n_alphar = cdist.getInt(cdist.find(alpha_r)); // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF} mv_n_r_alpha.reset(); for (DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) { // Stop at radius r if (neighbor.doubleValue() > r) { break; } DoubleIntArrayList cdist2 = interestingDistances.get(neighbor); int rn_alphar = cdist2.getInt(cdist2.find(alpha_r)); mv_n_r_alpha.put(rn_alphar); } // We only use the average and standard deviation final double nhat_r_alpha = mv_n_r_alpha.getMean(); final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev(); // Redundant divisions by nhat_r_alpha removed. final double mdef = nhat_r_alpha - n_alphar; final double sigmamdef = sigma_nhat_r_alpha; final double mdefnorm = mdef / sigmamdef; if (mdefnorm > maxmdefnorm) { maxmdefnorm = mdefnorm; maxnormr = r; } } } else { // FIXME: when nmin was not fulfilled - what is the proper value then? maxmdefnorm = Double.POSITIVE_INFINITY; maxnormr = maxdist; } mdef_norm.putDouble(iditer, maxmdefnorm); mdef_radius.putDouble(iditer, maxnormr); minmax.put(maxmdefnorm); LOG.incrementProcessed(progressLOCI); } LOG.ensureCompleted(progressLOCI); DoubleRelation scoreResult = new MaterializedDoubleRelation( "LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta( minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); result.addChildResult( new MaterializedDoubleRelation( "LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs())); return result; }
/** * Run the algorithm. * * @param database Database to use * @param relation Relation to use * @return Result */ public OutlierResult run(Database database, Relation<?> relation) { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); try (InputStream in = FileUtil.tryGzipInput(new FileInputStream(file)); // TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) { Tokenizer tokenizer = reader.getTokenizer(); CharSequence buf = reader.getBuffer(); Matcher mi = idpattern.matcher(buf), ms = scorepattern.matcher(buf); reader.reset(in); while (reader.nextLineExceptComments()) { Integer id = null; double score = Double.NaN; for ( /* initialized by nextLineExceptComments */ ; tokenizer.valid(); tokenizer.advance()) { mi.region(tokenizer.getStart(), tokenizer.getEnd()); ms.region(tokenizer.getStart(), tokenizer.getEnd()); final boolean mif = mi.find(); final boolean msf = ms.find(); if (mif && msf) { throw new AbortException( "ID pattern and score pattern both match value: " + tokenizer.getSubstring()); } if (mif) { if (id != null) { throw new AbortException( "ID pattern matched twice: previous value " + id + " second value: " + tokenizer.getSubstring()); } id = Integer.parseInt(buf.subSequence(mi.end(), tokenizer.getEnd()).toString()); } if (msf) { if (!Double.isNaN(score)) { throw new AbortException( "Score pattern matched twice: previous value " + score + " second value: " + tokenizer.getSubstring()); } score = ParseUtil.parseDouble(buf, ms.end(), tokenizer.getEnd()); } } if (id != null && !Double.isNaN(score)) { scores.putDouble(DBIDUtil.importInteger(id), score); minmax.put(score); } else if (id == null && Double.isNaN(score)) { LOG.warning( "Line did not match either ID nor score nor comment: " + reader.getLineNumber()); } else { throw new AbortException( "Line matched only ID or only SCORE patterns: " + reader.getLineNumber()); } } } catch (IOException e) { throw new AbortException( "Could not load outlier scores: " + e.getMessage() + " when loading " + file, e); } OutlierScoreMeta meta; if (inverted) { meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax()); } else { meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); } DoubleRelation scoresult = new MaterializedDoubleRelation( "External Outlier", "external-outlier", scores, relation.getDBIDs()); OutlierResult or = new OutlierResult(meta, scoresult); // Apply scaling if (scaling instanceof OutlierScalingFunction) { ((OutlierScalingFunction) scaling).prepare(or); } DoubleMinMax mm = new DoubleMinMax(); for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { double val = scoresult.doubleValue(iditer); val = scaling.getScaled(val); scores.putDouble(iditer, val); mm.put(val); } meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax()); or = new OutlierResult(meta, scoresult); return or; }