/** * DBSCAN-function expandCluster. * * <p>Border-Objects become members of the first possible cluster. * * @param relation Database relation to run on * @param rangeQuery Range query to use * @param startObjectID potential seed of a new potential cluster * @param objprog the progress object for logging the current status */ protected void expandCluster( Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) { DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon); ncounter += neighbors.size(); // startObject is no core-object if (neighbors.size() < minpts) { noise.add(startObjectID); processedIDs.add(startObjectID); if (objprog != null) { objprog.incrementProcessed(LOG); } return; } ModifiableDBIDs currentCluster = DBIDUtil.newArray(); currentCluster.add(startObjectID); processedIDs.add(startObjectID); // try to expand the cluster HashSetModifiableDBIDs seeds = DBIDUtil.newHashSet(); processNeighbors(neighbors.iter(), currentCluster, seeds); DBIDVar o = DBIDUtil.newVar(); while (!seeds.isEmpty()) { seeds.pop(o); neighbors = rangeQuery.getRangeForDBID(o, epsilon); ncounter += neighbors.size(); if (neighbors.size() >= minpts) { processNeighbors(neighbors.iter(), currentCluster, seeds); } if (objprog != null) { objprog.incrementProcessed(LOG); } } resultList.add(currentCluster); if (clusprog != null) { clusprog.setProcessed(resultList.size(), LOG); } }
/** * Performs a single run of FastDOC, finding a single cluster. * * @param database Database context * @param relation used to get actual values for DBIDs. * @param S The set of points we're working on. * @param d Dimensionality of the data set we're currently working on. * @param r Size of random samples. * @param m Number of inner iterations (per seed point). * @param n Number of outer iterations (seed points). * @return a cluster, if one is found, else <code>null</code>. */ private Cluster<SubspaceModel> runFastDOC( Database database, Relation<V> relation, ArrayModifiableDBIDs S, int d, int n, int m, int r) { // Relevant attributes of highest cardinality. long[] D = null; // The seed point for the best dimensions. DBIDVar dV = DBIDUtil.newVar(); // Inform the user about the progress in the current iteration. FiniteProgress iprogress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG) : null; Random random = rnd.getSingleThreadedRandom(); DBIDArrayIter iter = S.iter(); outer: for (int i = 0; i < n; ++i) { // Pick a random seed point. iter.seek(random.nextInt(S.size())); for (int j = 0; j < m; ++j) { // Choose a set of random points. DBIDs randomSet = DBIDUtil.randomSample(S, r, random); // Initialize cluster info. long[] nD = BitsUtil.zero(d); // Test each dimension. for (int k = 0; k < d; ++k) { if (dimensionIsRelevant(k, relation, randomSet)) { BitsUtil.setI(nD, k); } } if (D == null || BitsUtil.cardinality(nD) > BitsUtil.cardinality(D)) { D = nD; dV.set(iter); if (BitsUtil.cardinality(D) >= d_zero) { if (iprogress != null) { iprogress.setProcessed(iprogress.getTotal(), LOG); } break outer; } } LOG.incrementProcessed(iprogress); } } LOG.ensureCompleted(iprogress); // If no relevant dimensions were found, skip it. if (D == null || BitsUtil.cardinality(D) == 0) { return null; } // Get all points in the box. SubspaceMaximumDistanceFunction df = new SubspaceMaximumDistanceFunction(D); DistanceQuery<V> dq = database.getDistanceQuery(relation, df); RangeQuery<V> rq = database.getRangeQuery(dq, DatabaseQuery.HINT_SINGLE); // TODO: add filtering capabilities into query API! DBIDs C = DBIDUtil.intersection(S, rq.getRangeForDBID(dV, w)); // If we have a non-empty cluster, return it. return (C.size() > 0) ? makeCluster(relation, C, D) : null; }