/** * Main loop for OUTRES * * @param relation Relation to process * @return Outlier detection result */ public OutlierResult run(Relation<V> relation) { WritableDoubleDataStore ranks = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); KernelDensityEstimator kernel = new KernelDensityEstimator(relation); long[] subspace = BitsUtil.zero(kernel.dim); FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OUTRES scores", relation.size(), LOG) : null; for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { BitsUtil.zeroI(subspace); double score = outresScore(0, subspace, iditer, kernel); ranks.putDouble(iditer, score); minmax.put(score); LOG.incrementProcessed(progress); } LOG.ensureCompleted(progress); OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.); OutlierResult outresResult = new OutlierResult( meta, new MaterializedDoubleRelation("OUTRES", "outres-score", ranks, relation.getDBIDs())); return outresResult; }
/** * Subspace relevance test. * * @param subspace Subspace to test * @param neigh Neighbor list * @param kernel Kernel density estimator * @return relevance test result */ protected boolean relevantSubspace( long[] subspace, DoubleDBIDList neigh, KernelDensityEstimator kernel) { Relation<V> relation = kernel.relation; final double crit = K_S_CRITICAL001 / Math.sqrt(neigh.size()); for (int dim = BitsUtil.nextSetBit(subspace, 0); dim > 0; dim = BitsUtil.nextSetBit(subspace, dim + 1)) { // TODO: can we save this copy somehow? double[] data = new double[neigh.size()]; { int count = 0; for (DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { V vector = relation.get(neighbor); data[count] = vector.doubleValue(dim); count++; } assert (count == neigh.size()); } Arrays.sort(data); final double norm = data[data.length - 1] - data[0]; final double min = data[0]; // Kolmogorow-Smirnow-Test against uniform distribution: for (int j = 1; j < data.length - 2; j++) { double delta = (j / (data.length - 1.)) - ((data[j] - min) / norm); if (Math.abs(delta) > crit) { return false; } } } return true; }
@Override public ProjectedIndex<O, O> instantiate(Relation<O> relation) { if (!proj.getInputDataTypeInformation() .isAssignableFromType(relation.getDataTypeInformation())) { return null; } proj.initialize(relation.getDataTypeInformation()); final Relation<O> view; if (materialize) { DBIDs ids = relation.getDBIDs(); WritableDataStore<O> content = DataStoreUtil.makeStorage( ids, DataStoreFactory.HINT_DB, proj.getOutputDataTypeInformation().getRestrictionClass()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { content.put(iter, proj.project(relation.get(iter))); } view = new MaterializedRelation<>( "ECEF Projection", "ecef-projection", proj.getOutputDataTypeInformation(), content, ids); } else { view = new ProjectedView<>(relation, proj); } Index inneri = inner.instantiate(view); if (inneri == null) { return null; } return new LngLatAsECEFIndex<>(relation, proj, view, inneri, norefine); }
@Override public void redraw() { setupCSS(svgp); final StyleLibrary style = context.getStyleResult().getStyleLibrary(); double dotsize = style.getLineWidth(StyleLibrary.PLOT); for (DBIDIter id = sample.getSample().iter(); id.valid(); id.advance()) { double[] v = proj.fastProjectDataToRenderSpace(rel.get(id)); if (v[0] != v[0] || v[1] != v[1]) { continue; // NaN! } Element tooltip = makeTooltip(id, v[0], v[1], dotsize); SVGUtil.addCSSClass(tooltip, TOOLTIP_HIDDEN); // sensitive area. Element area = svgp.svgRect(v[0] - dotsize, v[1] - dotsize, 2 * dotsize, 2 * dotsize); SVGUtil.addCSSClass(area, TOOLTIP_AREA); EventTarget targ = (EventTarget) area; targ.addEventListener(SVGConstants.SVG_MOUSEOVER_EVENT_TYPE, hoverer, false); targ.addEventListener(SVGConstants.SVG_MOUSEOUT_EVENT_TYPE, hoverer, false); targ.addEventListener(SVGConstants.SVG_CLICK_EVENT_TYPE, hoverer, false); // NOTE: do not change the sequence in which these are inserted! layer.appendChild(area); layer.appendChild(tooltip); } }
@Override public boolean contains(DBIDRef o) { for (DBIDIter iter = iter(); iter.valid(); iter.advance()) { if (DBIDUtil.equal(iter, o)) { return true; } } return false; }
@Override public void run() { Database database = input.getDatabase(); Relation<O> relation = database.getRelation(distance.getInputTypeRestriction()); DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance); KNNQuery<O> knnQ = database.getKNNQuery(distanceQuery, DatabaseQuery.HINT_HEAVY_USE); // open file. try (RandomAccessFile file = new RandomAccessFile(out, "rw"); FileChannel channel = file.getChannel(); // and acquire a file write lock FileLock lock = channel.lock()) { // write magic header file.writeInt(KNN_CACHE_MAGIC); int bufsize = k * 12 * 2 + 10; // Initial size, enough for 2 kNN. ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize); FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing kNN", relation.size(), LOG) : null; for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { final KNNList nn = knnQ.getKNNForDBID(it, k); final int nnsize = nn.size(); // Grow the buffer when needed: if (nnsize * 12 + 10 > bufsize) { while (nnsize * 12 + 10 > bufsize) { bufsize <<= 1; } buffer = ByteBuffer.allocateDirect(bufsize); } buffer.clear(); ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex()); ByteArrayUtil.writeUnsignedVarint(buffer, nnsize); int c = 0; for (DoubleDBIDListIter ni = nn.iter(); ni.valid(); ni.advance(), c++) { ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex()); buffer.putDouble(ni.doubleValue()); } if (c != nn.size()) { throw new AbortException("Sizes did not agree. Cache is invalid."); } buffer.flip(); channel.write(buffer); LOG.incrementProcessed(prog); } LOG.ensureCompleted(prog); lock.release(); } catch (IOException e) { LOG.exception(e); } // FIXME: close! }
/** * Computes for each object the distance to one reference point. (one dimensional representation * of the data set) * * @param refPoint Reference Point Feature Vector * @param database database to work on * @param distFunc Distance function to use * @return array containing the distance to one reference point for each database object and the * object id */ protected DoubleDBIDList computeDistanceVector( NumberVector refPoint, Relation<? extends NumberVector> database, PrimitiveDistanceQuery<? super NumberVector> distFunc) { ModifiableDoubleDBIDList referenceDists = DBIDUtil.newDistanceDBIDList(database.size()); for (DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) { referenceDists.add(distFunc.distance(iditer, refPoint), iditer); } referenceDists.sort(); return referenceDists; }
/** * Process a single core point. * * @param neighbor Iterator over neighbors * @param currentCluster Current cluster * @param seeds Seed set */ private void processNeighbors( DBIDIter neighbor, ModifiableDBIDs currentCluster, HashSetModifiableDBIDs seeds) { for (; neighbor.valid(); neighbor.advance()) { if (processedIDs.add(neighbor)) { seeds.add(neighbor); } else if (!noise.remove(neighbor)) { continue; } currentCluster.add(neighbor); } }
@Override public void initialize() { super.initialize(); List<MkAppEntry> objs = new ArrayList<>(relation.size()); for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { DBID id = DBIDUtil.deref(iter); final O object = relation.get(id); objs.add(createNewLeafEntry(id, object, Double.NaN)); } insertAll(objs); }
/** * Utility method to test if a given dimension is relevant as determined via a set of reference * points (i.e. if the variance along the attribute is lower than the threshold). * * @param dimension the dimension to test. * @param relation used to get actual values for DBIDs. * @param points the points to test. * @return <code>true</code> if the dimension is relevant. */ private boolean dimensionIsRelevant(int dimension, Relation<V> relation, DBIDs points) { double min = Double.POSITIVE_INFINITY, max = Double.NEGATIVE_INFINITY; for (DBIDIter iter = points.iter(); iter.valid(); iter.advance()) { double xV = relation.get(iter).doubleValue(dimension); min = (xV < min) ? xV : min; max = (xV > max) ? xV : max; if (max - min > w) { return false; } } return true; }
/** * Preprocessing step: determine the radii of interest for each point. * * @param ids IDs to process * @param rangeQuery Range query * @param interestingDistances Distances of interest */ protected void precomputeInterestingRadii( DBIDs ids, RangeQuery<O> rangeQuery, WritableDataStore<DoubleIntArrayList> interestingDistances) { FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null; for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) { DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax); // build list of critical distances DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1); { int i = 0; DoubleDBIDListIter ni = neighbors.iter(); while (ni.valid()) { final double curdist = ni.doubleValue(); ++i; ni.advance(); // Skip, if tied to the next object: if (ni.valid() && curdist == ni.doubleValue()) { continue; } cdist.append(curdist, i); // Scale radius, and reinsert if (alpha != 1.) { final double ri = curdist / alpha; if (ri <= rmax) { cdist.append(ri, Integer.MIN_VALUE); } } } } cdist.sort(); // fill the gaps to have fast lookups of number of neighbors at a given // distance. int lastk = 0; for (int i = 0, size = cdist.size(); i < size; i++) { final int k = cdist.getInt(i); if (k == Integer.MIN_VALUE) { cdist.setValue(i, lastk); } else { lastk = k; } } // TODO: shrink the list, removing duplicate radii? interestingDistances.put(iditer, cdist); LOG.incrementProcessed(progressPreproc); } LOG.ensureCompleted(progressPreproc); }
/** * Process a database * * @param database Database to process * @param relation Relation to process * @return Histogram of ranking qualities */ public HistogramResult<DoubleVector> run(Database database, Relation<O> relation) { final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction()); final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, relation.size()); if (LOG.isVerbose()) { LOG.verbose("Preprocessing clusters..."); } // Cluster by labels Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters(); DoubleStaticHistogram hist = new DoubleStaticHistogram(numbins, 0.0, 1.0); if (LOG.isVerbose()) { LOG.verbose("Processing points..."); } FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null; MeanVariance mv = new MeanVariance(); // sort neighbors for (Cluster<?> clus : split) { for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) { KNNList knn = knnQuery.getKNNForDBID(iter, relation.size()); double result = new ROCEvaluation().evaluate(clus, knn); mv.put(result); hist.increment(result, 1. / relation.size()); LOG.incrementProcessed(progress); } } LOG.ensureCompleted(progress); // Transform Histogram into a Double Vector array. Collection<DoubleVector> res = new ArrayList<>(relation.size()); for (DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) { DoubleVector row = new DoubleVector(new double[] {iter.getCenter(), iter.getValue()}); res.add(row); } HistogramResult<DoubleVector> result = new HistogramResult<>("Ranking Quality Histogram", "ranking-histogram", res); result.addHeader("Mean: " + mv.getMean() + " Variance: " + mv.getSampleVariance()); return result; }
@Override public void fullRedraw() { setupCanvas(); final StyleLibrary style = context.getStyleLibrary(); CSSClass css = new CSSClass(svgp, POLYS); // TODO: separate fill and line colors? css.setStatement( SVGConstants.CSS_STROKE_WIDTH_PROPERTY, style.getLineWidth(StyleLibrary.POLYGONS)); css.setStatement(SVGConstants.CSS_STROKE_PROPERTY, style.getColor(StyleLibrary.POLYGONS)); css.setStatement(SVGConstants.CSS_FILL_PROPERTY, SVGConstants.CSS_NONE_VALUE); svgp.addCSSClassOrLogError(css); svgp.updateStyleElement(); // draw data for (DBIDIter iditer = rep.iterDBIDs(); iditer.valid(); iditer.advance()) { try { PolygonsObject poly = rep.get(iditer); if (poly == null) { continue; } SVGPath path = new SVGPath(); for (Polygon ppoly : poly.getPolygons()) { Vector first = ppoly.get(0); double[] f = proj.fastProjectDataToRenderSpace(first.getArrayRef()); path.moveTo(f[0], f[1]); for (ArrayListIter<Vector> it = ppoly.iter(); it.valid(); it.advance()) { if (it.getOffset() == 0) { continue; } double[] p = proj.fastProjectDataToRenderSpace(it.get().getArrayRef()); path.drawTo(p[0], p[1]); } // close path. path.drawTo(f[0], f[1]); } Element e = path.makeElement(svgp); SVGUtil.addCSSClass(e, POLYS); layer.appendChild(e); } catch (ObjectNotFoundException e) { // ignore. } } }
protected double[] computeWithinDistances( Relation<? extends NumberVector> rel, List<? extends Cluster<?>> clusters, int withinPairs) { double[] concordant = new double[withinPairs]; int i = 0; for (Cluster<?> cluster : clusters) { if (cluster.size() <= 1 || cluster.isNoise()) { switch (noiseHandling) { case IGNORE_NOISE: continue; case TREAT_NOISE_AS_SINGLETONS: continue; // No concordant distances. case MERGE_NOISE: break; // Treat like a cluster below. } } for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) { NumberVector obj = rel.get(it1); for (DBIDIter it2 = cluster.getIDs().iter(); it2.valid(); it2.advance()) { if (DBIDUtil.compare(it1, it2) <= 0) { continue; } concordant[i++] = distanceFunction.distance(obj, rel.get(it2)); } } } assert (concordant.length == i); Arrays.sort(concordant); return concordant; }
private DBIDs[] buildIndex(Relation<BitVector> relation, int dim, int minsupp) { ArrayModifiableDBIDs[] idx = new ArrayModifiableDBIDs[dim]; for (int i = 0; i < dim; i++) { idx[i] = DBIDUtil.newArray(); } for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { SparseFeatureVector<?> bv = relation.get(iter); // TODO: only count those which satisfy minlength? for (int it = bv.iter(); bv.iterValid(it); it = bv.iterAdvance(it)) { idx[bv.iterDim(it)].add(iter); } } // Forget non-frequent 1-itemsets. for (int i = 0; i < dim; i++) { if (idx[i].size() < minsupp) { idx[i] = null; } else { idx[i].sort(); } } return idx; }
/** * Run the DBSCAN algorithm * * @param relation Data relation * @param rangeQuery Range query class */ protected void runDBSCAN(Relation<O> relation, RangeQuery<O> rangeQuery) { final int size = relation.size(); FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("Processing objects", size, LOG) : null; IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null; processedIDs = DBIDUtil.newHashSet(size); for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { if (!processedIDs.contains(iditer)) { expandCluster(relation, rangeQuery, iditer, objprog, clusprog); } if (objprog != null && clusprog != null) { objprog.setProcessed(processedIDs.size(), LOG); clusprog.setProcessed(resultList.size(), LOG); } if (processedIDs.size() == size) { break; } } // Finish progress logging LOG.ensureCompleted(objprog); LOG.setCompleted(clusprog); }
/** * Compute the intersection size. * * @param neighbors1 SORTED neighbor ids of first * @param neighbors2 SORTED neighbor ids of second * @return Intersection size */ protected static int countSharedNeighbors(DBIDs neighbors1, DBIDs neighbors2) { int intersection = 0; DBIDIter iter1 = neighbors1.iter(); DBIDIter iter2 = neighbors2.iter(); while (iter1.valid() && iter2.valid()) { final int comp = DBIDUtil.compare(iter1, iter2); if (comp == 0) { intersection++; iter1.advance(); iter2.advance(); } else if (comp < 0) { iter1.advance(); } else // iter2 < iter1 { iter2.advance(); } } return intersection; }
/** * Run the algorithm on the given relation. * * @param database Database * @param relation Relation to process * @return Outlier result */ public OutlierResult run(Database database, Relation<? extends NumberVector> relation) { @SuppressWarnings("unchecked") PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction); Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation); if (refPoints.size() < 1) { throw new AbortException("Cannot compute ROS without reference points!"); } DBIDs ids = relation.getDBIDs(); if (k >= ids.size()) { throw new AbortException("k must not be chosen larger than the database size!"); } // storage of distance/score values. WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage( ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN); // Compute density estimation: for (NumberVector refPoint : refPoints) { DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq); updateDensities(rbod_score, referenceDists); } // compute maximum density DoubleMinMax mm = new DoubleMinMax(); for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { mm.put(rbod_score.doubleValue(iditer)); } // compute ROS double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.; mm.reset(); // Reuse for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { double score = 1 - (rbod_score.doubleValue(iditer) * scale); mm.put(score); rbod_score.putDouble(iditer, score); } DoubleRelation scoreResult = new MaterializedDoubleRelation( "Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); // adds reference points to the result. header information for the // visualizer to find the reference points in the result result.addChildResult( new ReferencePointsResult<>("Reference points", "reference-points", refPoints)); return result; }
private DBIDs mergeJoin(DBIDs first, DBIDs second) { assert (!(first instanceof HashSetDBIDs)); assert (!(second instanceof HashSetDBIDs)); ArrayModifiableDBIDs ids = DBIDUtil.newArray(); DBIDIter i1 = first.iter(), i2 = second.iter(); while (i1.valid() && i2.valid()) { int c = DBIDUtil.compare(i1, i2); if (c < 0) { i1.advance(); } else if (c > 0) { i2.advance(); } else { ids.add(i1); i1.advance(); i2.advance(); } } return ids; }
/** * Run the algorithm * * @param relation Data relation * @return Outlier result */ public OutlierResult run(Relation<V> relation) { DoubleMinMax mm = new DoubleMinMax(); // resulting scores WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage( relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT); // Compute mean and covariance Matrix CovarianceMatrix temp = CovarianceMatrix.make(relation); double[] mean = temp.getMeanVector(relation).toArray(); // debugFine(mean.toString()); Matrix covarianceMatrix = temp.destroyToNaiveMatrix(); // debugFine(covarianceMatrix.toString()); Matrix covarianceTransposed = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse(); // Normalization factors for Gaussian PDF final double fakt = (1.0 / (Math.sqrt( MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * covarianceMatrix.det()))); // for each object compute Mahalanobis distance for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { double[] x = minusEquals(relation.get(iditer).toArray(), mean); // Gaussian PDF final double mDist = transposeTimesTimes(x, covarianceTransposed, x); final double prob = fakt * Math.exp(-mDist * .5); mm.put(prob); oscores.putDouble(iditer, prob); } final OutlierScoreMeta meta; if (invert) { double max = mm.getMax() != 0 ? mm.getMax() : 1.; for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { oscores.putDouble(iditer, (max - oscores.doubleValue(iditer)) / max); } meta = new BasicOutlierScoreMeta(0.0, 1.0); } else { meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY); } DoubleRelation res = new MaterializedDoubleRelation( "Gaussian Model Outlier Score", "gaussian-model-outlier", oscores, relation.getDBIDs()); return new OutlierResult(meta, res); }
/** * Inserts the specified objects into this index. If a bulk load mode is implemented, the objects * are inserted in one bulk. * * @param ids the objects to be inserted */ @Override public void insertAll(DBIDs ids) { if (ids.isEmpty() || (ids.size() == 1)) { return; } // Make an example leaf if (canBulkLoad()) { List<SpatialEntry> leafs = new ArrayList<>(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { leafs.add(createNewLeafEntry(iter)); } bulkLoad(leafs); } else { for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { insert(DBIDUtil.deref(iter)); } } doExtraIntegrityChecks(); }
/** * Run the ODIN algorithm * * @param database Database to run on. * @param relation Relation to process. * @return ODIN outlier result. */ public OutlierResult run(Database database, Relation<O> relation) { // Get the query functions: DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction()); KNNQuery<O> knnq = database.getKNNQuery(dq, k); // Get the objects to process, and a data storage for counting and output: DBIDs ids = relation.getDBIDs(); WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB, 0.); double inc = 1. / (k - 1); double min = Double.POSITIVE_INFINITY, max = 0.0; // Process all objects for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { // Find the nearest neighbors (using an index, if available!) DBIDs neighbors = knnq.getKNNForDBID(iter, k); // For each neighbor, except ourselves, increase the in-degree: for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) { if (DBIDUtil.equal(iter, nei)) { continue; } final double value = scores.doubleValue(nei) + inc; if (value < min) { min = value; } if (value > max) { max = value; } scores.put(nei, value); } } // Wrap the result and add metadata. OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., inc * (ids.size() - 1), 1); DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids); return new OutlierResult(meta, rel); }
/** * The main run method * * @param database Database to use (actually unused) * @param spatial Relation for neighborhood * @param relation Attributes to evaluate * @return Outlier result */ public OutlierResult run(Database database, Relation<N> spatial, Relation<O> relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, spatial); DistanceQuery<O> distFunc = getNonSpatialDistanceFunction().instantiate(relation); WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage( relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT); WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax lofminmax = new DoubleMinMax(); // Compute densities for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { DBIDs neighbors = npred.getNeighborDBIDs(iditer); double avg = 0; for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { avg += distFunc.distance(iditer, iter); } double lrd = 1 / (avg / neighbors.size()); if (Double.isNaN(lrd)) { lrd = 0; } lrds.putDouble(iditer, lrd); } // Compute density quotients for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { DBIDs neighbors = npred.getNeighborDBIDs(iditer); double avg = 0; for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { avg += lrds.doubleValue(iter); } final double lrd = (avg / neighbors.size()) / lrds.doubleValue(iditer); if (!Double.isNaN(lrd)) { lofs.putDouble(iditer, lrd); lofminmax.put(lrd); } else { lofs.putDouble(iditer, 0.0); } } // Build result representation. DoubleRelation scoreResult = new MaterializedDoubleRelation( "Spatial Outlier Factor", "sof-outlier", lofs, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta( lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); return or; }
/** * Run the algorithm * * @param database Database to process * @param relation Relation to process * @return Outlier result */ public OutlierResult run(Database database, Relation<O> relation) { DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction()); RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc); DBIDs ids = relation.getDBIDs(); // LOCI preprocessing step WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage( relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class); precomputeInterestingRadii(ids, rangeQuery, interestingDistances); // LOCI main step FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null; WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); // Shared instance, to save allocations. MeanVariance mv_n_r_alpha = new MeanVariance(); for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) { final DoubleIntArrayList cdist = interestingDistances.get(iditer); final double maxdist = cdist.getDouble(cdist.size() - 1); final int maxneig = cdist.getInt(cdist.size() - 1); double maxmdefnorm = 0.0; double maxnormr = 0; if (maxneig >= nmin) { // Compute the largest neighborhood we will need. DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist); // TODO: Ensure the result is sorted. This is currently implied. // For any critical distance, compute the normalized MDEF score. for (int i = 0, size = cdist.size(); i < size; i++) { // Only start when minimum size is fulfilled if (cdist.getInt(i) < nmin) { continue; } final double r = cdist.getDouble(i); final double alpha_r = alpha * r; // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!) final int n_alphar = cdist.getInt(cdist.find(alpha_r)); // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF} mv_n_r_alpha.reset(); for (DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) { // Stop at radius r if (neighbor.doubleValue() > r) { break; } DoubleIntArrayList cdist2 = interestingDistances.get(neighbor); int rn_alphar = cdist2.getInt(cdist2.find(alpha_r)); mv_n_r_alpha.put(rn_alphar); } // We only use the average and standard deviation final double nhat_r_alpha = mv_n_r_alpha.getMean(); final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev(); // Redundant divisions by nhat_r_alpha removed. final double mdef = nhat_r_alpha - n_alphar; final double sigmamdef = sigma_nhat_r_alpha; final double mdefnorm = mdef / sigmamdef; if (mdefnorm > maxmdefnorm) { maxmdefnorm = mdefnorm; maxnormr = r; } } } else { // FIXME: when nmin was not fulfilled - what is the proper value then? maxmdefnorm = Double.POSITIVE_INFINITY; maxnormr = maxdist; } mdef_norm.putDouble(iditer, maxmdefnorm); mdef_radius.putDouble(iditer, maxnormr); minmax.put(maxmdefnorm); LOG.incrementProcessed(progressLOCI); } LOG.ensureCompleted(progressLOCI); DoubleRelation scoreResult = new MaterializedDoubleRelation( "LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta( minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); result.addChildResult( new MaterializedDoubleRelation( "LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs())); return result; }
/** * Evaluate a single clustering. * * @param db Database * @param rel Data relation * @param c Clustering * @return Mean simplified silhouette */ public double evaluateClustering( Database db, Relation<? extends NumberVector> rel, Clustering<?> c) { List<? extends Cluster<?>> clusters = c.getAllClusters(); NumberVector[] centroids = new NumberVector[clusters.size()]; int ignorednoise = centroids(rel, clusters, centroids, noiseOption); MeanVariance mssil = new MeanVariance(); Iterator<? extends Cluster<?>> ci = clusters.iterator(); for (int i = 0; ci.hasNext(); i++) { Cluster<?> cluster = ci.next(); if (cluster.size() <= 1) { // As suggested in Rousseeuw, we use 0 for singletons. mssil.put(0., cluster.size()); continue; } if (cluster.isNoise()) { switch (noiseOption) { case IGNORE_NOISE: continue; // Ignore elements case TREAT_NOISE_AS_SINGLETONS: // As suggested in Rousseeuw, we use 0 for singletons. mssil.put(0., cluster.size()); continue; case MERGE_NOISE: break; // Treat as cluster below } } // Cluster center: final NumberVector center = centroids[i]; assert (center != null); for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) { NumberVector obj = rel.get(it); // a: Distance to own centroid double a = distance.distance(center, obj); // b: Distance to other clusters centroids: double min = Double.POSITIVE_INFINITY; Iterator<? extends Cluster<?>> cj = clusters.iterator(); for (int j = 0; cj.hasNext(); j++) { Cluster<?> ocluster = cj.next(); if (i == j) { continue; } NumberVector other = centroids[j]; if (other == null) { // Noise! switch (noiseOption) { case IGNORE_NOISE: continue; case TREAT_NOISE_AS_SINGLETONS: // Treat each object like a centroid! for (DBIDIter it2 = ocluster.getIDs().iter(); it2.valid(); it2.advance()) { double dist = distance.distance(rel.get(it2), obj); min = dist < min ? dist : min; } continue; case MERGE_NOISE: break; // Treat as cluster below, but should not be reachable. } } // Clusters: use centroid. double dist = distance.distance(other, obj); min = dist < min ? dist : min; } // One 'real' cluster only? min = min < Double.POSITIVE_INFINITY ? min : a; mssil.put((min - a) / (min > a ? min : a)); } } double penalty = 1.; // Only if {@link NoiseHandling#IGNORE_NOISE}: if (penalize && ignorednoise > 0) { penalty = (rel.size() - ignorednoise) / (double) rel.size(); } final double meanssil = penalty * mssil.getMean(); final double stdssil = penalty * mssil.getSampleStddev(); if (LOG.isStatistics()) { LOG.statistics( new StringStatistic( key + ".simplified-silhouette.noise-handling", noiseOption.toString())); if (ignorednoise > 0) { LOG.statistics(new LongStatistic(key + ".simplified-silhouette.ignored", ignorednoise)); } LOG.statistics(new DoubleStatistic(key + ".simplified-silhouette.mean", meanssil)); LOG.statistics(new DoubleStatistic(key + ".simplified-silhouette.stddev", stdssil)); } EvaluationResult ev = EvaluationResult.findOrCreate( db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation"); MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation"); g.addMeasure( "Simp. Silhouette +-" + FormatUtil.NF2.format(stdssil), meanssil, -1., 1., 0., false); db.getHierarchy().resultChanged(ev); return meanssil; }
/** * Run the algorithm. * * @param database Database to use * @param relation Relation to use * @return Result */ public OutlierResult run(Database database, Relation<?> relation) { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); try (InputStream in = FileUtil.tryGzipInput(new FileInputStream(file)); // TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) { Tokenizer tokenizer = reader.getTokenizer(); CharSequence buf = reader.getBuffer(); Matcher mi = idpattern.matcher(buf), ms = scorepattern.matcher(buf); reader.reset(in); while (reader.nextLineExceptComments()) { Integer id = null; double score = Double.NaN; for ( /* initialized by nextLineExceptComments */ ; tokenizer.valid(); tokenizer.advance()) { mi.region(tokenizer.getStart(), tokenizer.getEnd()); ms.region(tokenizer.getStart(), tokenizer.getEnd()); final boolean mif = mi.find(); final boolean msf = ms.find(); if (mif && msf) { throw new AbortException( "ID pattern and score pattern both match value: " + tokenizer.getSubstring()); } if (mif) { if (id != null) { throw new AbortException( "ID pattern matched twice: previous value " + id + " second value: " + tokenizer.getSubstring()); } id = Integer.parseInt(buf.subSequence(mi.end(), tokenizer.getEnd()).toString()); } if (msf) { if (!Double.isNaN(score)) { throw new AbortException( "Score pattern matched twice: previous value " + score + " second value: " + tokenizer.getSubstring()); } score = ParseUtil.parseDouble(buf, ms.end(), tokenizer.getEnd()); } } if (id != null && !Double.isNaN(score)) { scores.putDouble(DBIDUtil.importInteger(id), score); minmax.put(score); } else if (id == null && Double.isNaN(score)) { LOG.warning( "Line did not match either ID nor score nor comment: " + reader.getLineNumber()); } else { throw new AbortException( "Line matched only ID or only SCORE patterns: " + reader.getLineNumber()); } } } catch (IOException e) { throw new AbortException( "Could not load outlier scores: " + e.getMessage() + " when loading " + file, e); } OutlierScoreMeta meta; if (inverted) { meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax()); } else { meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); } DoubleRelation scoresult = new MaterializedDoubleRelation( "External Outlier", "external-outlier", scores, relation.getDBIDs()); OutlierResult or = new OutlierResult(meta, scoresult); // Apply scaling if (scaling instanceof OutlierScalingFunction) { ((OutlierScalingFunction) scaling).prepare(or); } DoubleMinMax mm = new DoubleMinMax(); for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { double val = scoresult.doubleValue(iditer); val = scaling.getScaled(val); scores.putDouble(iditer, val); mm.put(val); } meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax()); or = new OutlierResult(meta, scoresult); return or; }
/** * Run the algorithm * * @param db Database * @param relation Relation * @return Clustering hierarchy */ public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) { DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction()); ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs()); final int size = ids.size(); if (size > 0x10000) { throw new AbortException( "This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow."); } if (Linkage.SINGLE.equals(linkage)) { LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!"); } // Compute the initial (lower triangular) distance matrix. double[] scratch = new double[triangleSize(size)]; DBIDArrayIter ix = ids.iter(), iy = ids.iter(), ij = ids.iter(); // Position counter - must agree with computeOffset! int pos = 0; boolean square = Linkage.WARD.equals(linkage) && !(SquaredEuclideanDistanceFunction.class.isInstance(getDistanceFunction())); for (int x = 0; ix.valid(); x++, ix.advance()) { iy.seek(0); for (int y = 0; y < x; y++, iy.advance()) { scratch[pos] = dq.distance(ix, iy); // Ward uses variances -- i.e. squared values if (square) { scratch[pos] *= scratch[pos]; } pos++; } } // Initialize space for result: WritableDBIDDataStore parent = DataStoreUtil.makeDBIDStorage( ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); WritableDoubleDataStore height = DataStoreUtil.makeDoubleStorage( ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage( ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { parent.put(it, it); height.put(it, Double.POSITIVE_INFINITY); csize.put(it, 1); } // Repeat until everything merged, except the desired number of clusters: FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null; for (int i = 1; i < size; i++) { double min = Double.POSITIVE_INFINITY; int minx = -1, miny = -1; for (ix.seek(0); ix.valid(); ix.advance()) { if (height.doubleValue(ix) < Double.POSITIVE_INFINITY) { continue; } final int xbase = triangleSize(ix.getOffset()); for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) { if (height.doubleValue(iy) < Double.POSITIVE_INFINITY) { continue; } final int idx = xbase + iy.getOffset(); if (scratch[idx] <= min) { min = scratch[idx]; minx = ix.getOffset(); miny = iy.getOffset(); } } } assert (minx >= 0 && miny >= 0); // Avoid allocating memory, by reusing existing iterators: ix.seek(minx); iy.seek(miny); // Perform merge in data structure: x -> y // Since y < x, prefer keeping y, dropping x. int sizex = csize.intValue(ix), sizey = csize.intValue(iy); height.put(ix, min); parent.put(ix, iy); csize.put(iy, sizex + sizey); // Update distance matrix. Note: miny < minx final int xbase = triangleSize(minx), ybase = triangleSize(miny); // Write to (y, j), with j < y for (ij.seek(0); ij.getOffset() < miny; ij.advance()) { if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) { continue; } final int sizej = csize.intValue(ij); scratch[ybase + ij.getOffset()] = linkage.combine( sizex, scratch[xbase + ij.getOffset()], sizey, scratch[ybase + ij.getOffset()], sizej, min); } // Write to (j, y), with y < j < x for (ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) { if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) { continue; } final int jbase = triangleSize(ij.getOffset()); final int sizej = csize.intValue(ij); scratch[jbase + miny] = linkage.combine( sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + miny], sizej, min); } // Write to (j, y), with y < x < j for (ij.seek(minx + 1); ij.valid(); ij.advance()) { if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) { continue; } final int jbase = triangleSize(ij.getOffset()); final int sizej = csize.intValue(ij); scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min); } LOG.incrementProcessed(prog); } LOG.ensureCompleted(prog); return new PointerHierarchyRepresentationResult(ids, parent, height); }
/** * Evaluate a single clustering. * * @param db Database * @param rel Data relation * @param c Clustering * @return C-Index */ public double evaluateClustering( Database db, Relation<? extends O> rel, DistanceQuery<O> dq, Clustering<?> c) { List<? extends Cluster<?>> clusters = c.getAllClusters(); // theta is the sum, w the number of within group distances double theta = 0; int w = 0; int ignorednoise = 0; int isize = clusters.size() <= 1 ? rel.size() : rel.size() / (clusters.size() - 1); DoubleArray pairDists = new DoubleArray(isize); for (int i = 0; i < clusters.size(); i++) { Cluster<?> cluster = clusters.get(i); if (cluster.size() <= 1 || cluster.isNoise()) { switch (noiseOption) { case IGNORE_NOISE: ignorednoise += cluster.size(); continue; // Ignore case TREAT_NOISE_AS_SINGLETONS: continue; // No within-cluster distances! case MERGE_NOISE: break; // Treat like a cluster } } for (DBIDIter it1 = cluster.getIDs().iter(); it1.valid(); it1.advance()) { O obj = rel.get(it1); // Compare object to every cluster, but only once for (int j = i; j < clusters.size(); j++) { Cluster<?> ocluster = clusters.get(j); if (ocluster.size() <= 1 || ocluster.isNoise()) { switch (noiseOption) { case IGNORE_NOISE: continue; // Ignore this cluster. case TREAT_NOISE_AS_SINGLETONS: case MERGE_NOISE: break; // Treat like a cluster } } for (DBIDIter it2 = ocluster.getIDs().iter(); it2.valid(); it2.advance()) { if (DBIDUtil.compare(it1, it2) <= 0) { // Only once. continue; } double dist = dq.distance(obj, rel.get(it2)); pairDists.add(dist); if (ocluster == cluster) { // Within-cluster distances. theta += dist; w++; } } } } } // Simulate best and worst cases: pairDists.sort(); double min = 0, max = 0; for (int i = 0, j = pairDists.size() - 1; i < w; i++, j--) { min += pairDists.get(i); max += pairDists.get(j); } double cIndex = (max > min) ? (theta - min) / (max - min) : 0.; if (LOG.isStatistics()) { LOG.statistics(new StringStatistic(key + ".c-index.noise-handling", noiseOption.toString())); if (ignorednoise > 0) { LOG.statistics(new LongStatistic(key + ".c-index.ignored", ignorednoise)); } LOG.statistics(new DoubleStatistic(key + ".c-index", cIndex)); } EvaluationResult ev = EvaluationResult.findOrCreate( db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation"); MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation"); g.addMeasure("C-Index", cIndex, 0., 1., 0., true); db.getHierarchy().resultChanged(ev); return cIndex; }
/** * Evaluate a single clustering. * * @param db Database * @param rel Data relation * @param c Clustering * @return Gamma index */ public double evaluateClustering( Database db, Relation<? extends NumberVector> rel, Clustering<?> c) { List<? extends Cluster<?>> clusters = c.getAllClusters(); int ignorednoise = 0, withinPairs = 0; for (Cluster<?> cluster : clusters) { if ((cluster.size() <= 1 || cluster.isNoise())) { switch (noiseHandling) { case IGNORE_NOISE: ignorednoise += cluster.size(); continue; case TREAT_NOISE_AS_SINGLETONS: continue; // No concordant distances. case MERGE_NOISE: break; // Treat like a cluster below. } } withinPairs += (cluster.size() * (cluster.size() - 1)) >>> 1; if (withinPairs < 0) { throw new AbortException( "Integer overflow - clusters too large to compute pairwise distances."); } } // Materialize within-cluster distances (sorted): double[] withinDistances = computeWithinDistances(rel, clusters, withinPairs); int[] withinTies = new int[withinDistances.length]; // Count ties within countTies(withinDistances, withinTies); long concordantPairs = 0, discordantPairs = 0, betweenPairs = 0; // Step two, compute discordant distances: for (int i = 0; i < clusters.size(); i++) { Cluster<?> ocluster1 = clusters.get(i); if ((ocluster1.size() <= 1 || ocluster1.isNoise()) // && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) { continue; } for (int j = i + 1; j < clusters.size(); j++) { Cluster<?> ocluster2 = clusters.get(j); if ((ocluster2.size() <= 1 || ocluster2.isNoise()) // && noiseHandling.equals(NoiseHandling.IGNORE_NOISE)) { continue; } betweenPairs += ocluster1.size() * ocluster2.size(); for (DBIDIter oit1 = ocluster1.getIDs().iter(); oit1.valid(); oit1.advance()) { NumberVector obj = rel.get(oit1); for (DBIDIter oit2 = ocluster2.getIDs().iter(); oit2.valid(); oit2.advance()) { double dist = distanceFunction.distance(obj, rel.get(oit2)); int p = Arrays.binarySearch(withinDistances, dist); if (p >= 0) { // Tied distances: while (p > 0 && withinDistances[p - 1] >= dist) { --p; } concordantPairs += p; discordantPairs += withinDistances.length - p - withinTies[p]; continue; } p = -p - 1; concordantPairs += p; discordantPairs += withinDistances.length - p; } } } } // Total number of pairs possible: final long t = ((rel.size() - ignorednoise) * (long) (rel.size() - ignorednoise - 1)) >>> 1; final long tt = (t * (t - 1)) >>> 1; final double gamma = (concordantPairs - discordantPairs) / (double) (concordantPairs + discordantPairs); final double tau = computeTau(concordantPairs, discordantPairs, tt, withinDistances.length, betweenPairs); if (LOG.isStatistics()) { LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString())); if (ignorednoise > 0) { LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise)); } LOG.statistics(new DoubleStatistic(key + ".gamma", gamma)); LOG.statistics(new DoubleStatistic(key + ".tau", tau)); } EvaluationResult ev = EvaluationResult.findOrCreate( db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation"); MeasurementGroup g = ev.findOrCreateGroup("Concordance-based Evaluation"); g.addMeasure("Gamma", gamma, -1., 1., 0., false); g.addMeasure("Tau", tau, -1., +1., 0., false); db.getHierarchy().resultChanged(ev); return gamma; }
@Override public void deleteAll(DBIDs ids) { for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { delete(iter); } }