/** * Constructor. * * @param size Size * @param idmap ID map */ public ArrayDBIDStore(int size, DataStoreIDMap idmap) { super(); this.data = DBIDUtil.newArray(size); // Initialize DBIDRef inv = DBIDUtil.invalid(); for (int i = 0; i < size; i++) { data.add(inv); } this.idmap = idmap; }
/** * DBSCAN-function expandCluster. * * <p>Border-Objects become members of the first possible cluster. * * @param relation Database relation to run on * @param rangeQuery Range query to use * @param startObjectID potential seed of a new potential cluster * @param objprog the progress object for logging the current status */ protected void expandCluster( Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) { DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon); ncounter += neighbors.size(); // startObject is no core-object if (neighbors.size() < minpts) { noise.add(startObjectID); processedIDs.add(startObjectID); if (objprog != null) { objprog.incrementProcessed(LOG); } return; } ModifiableDBIDs currentCluster = DBIDUtil.newArray(); currentCluster.add(startObjectID); processedIDs.add(startObjectID); // try to expand the cluster HashSetModifiableDBIDs seeds = DBIDUtil.newHashSet(); processNeighbors(neighbors.iter(), currentCluster, seeds); DBIDVar o = DBIDUtil.newVar(); while (!seeds.isEmpty()) { seeds.pop(o); neighbors = rangeQuery.getRangeForDBID(o, epsilon); ncounter += neighbors.size(); if (neighbors.size() >= minpts) { processNeighbors(neighbors.iter(), currentCluster, seeds); } if (objprog != null) { objprog.incrementProcessed(LOG); } } resultList.add(currentCluster); if (clusprog != null) { clusprog.setProcessed(resultList.size(), LOG); } }
private DBIDs mergeJoin(DBIDs first, DBIDs second) { assert (!(first instanceof HashSetDBIDs)); assert (!(second instanceof HashSetDBIDs)); ArrayModifiableDBIDs ids = DBIDUtil.newArray(); DBIDIter i1 = first.iter(), i2 = second.iter(); while (i1.valid() && i2.valid()) { int c = DBIDUtil.compare(i1, i2); if (c < 0) { i1.advance(); } else if (c > 0) { i2.advance(); } else { ids.add(i1); i1.advance(); i2.advance(); } } return ids; }
private DBIDs[] buildIndex(Relation<BitVector> relation, int dim, int minsupp) { ArrayModifiableDBIDs[] idx = new ArrayModifiableDBIDs[dim]; for (int i = 0; i < dim; i++) { idx[i] = DBIDUtil.newArray(); } for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { SparseFeatureVector<?> bv = relation.get(iter); // TODO: only count those which satisfy minlength? for (int it = bv.iter(); bv.iterValid(it); it = bv.iterAdvance(it)) { idx[bv.iterDim(it)].add(iter); } } // Forget non-frequent 1-itemsets. for (int i = 0; i < dim; i++) { if (idx[i].size() < minsupp) { idx[i] = null; } else { idx[i].sort(); } } return idx; }
/** * Performs the DOC or FastDOC (as configured) algorithm on the given Database. * * <p>This will run exhaustively, i.e. run DOC until no clusters are found anymore / the database * size has shrunk below the threshold for minimum cluster size. * * @param database Database * @param relation Data relation */ public Clustering<SubspaceModel> run(Database database, Relation<V> relation) { // Dimensionality of our set. final int d = RelationUtil.dimensionality(relation); // Get available DBIDs as a set we can remove items from. ArrayModifiableDBIDs S = DBIDUtil.newArray(relation.getDBIDs()); // Precompute values as described in Figure 2. double r = Math.abs(Math.log(d + d) / Math.log(beta * .5)); // Outer loop count. int n = (int) (2. / alpha); // Inner loop count. int m = (int) (Math.pow(2. / alpha, r) * Math.log(4)); if (heuristics) { m = Math.min(m, Math.min(1000000, d * d)); } // Minimum size for a cluster for it to be accepted. int minClusterSize = (int) (alpha * S.size()); // List of all clusters we found. Clustering<SubspaceModel> result = new Clustering<>("DOC Clusters", "DOC"); // Inform the user about the number of actual clusters found so far. IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null; // To not only find a single cluster, we continue running until our set // of points is empty. while (S.size() > minClusterSize) { Cluster<SubspaceModel> C; if (heuristics) { C = runFastDOC(database, relation, S, d, n, m, (int) r); } else { C = runDOC(database, relation, S, d, n, m, (int) r, minClusterSize); } if (C == null) { // Stop trying if we couldn't find a cluster. break; } // Found a cluster, remember it, remove its points from the set. result.addToplevelCluster(C); // Remove all points of the cluster from the set and continue. S.removeDBIDs(C.getIDs()); if (cprogress != null) { cprogress.setProcessed(result.getAllClusters().size(), LOG); } } // Add the remainder as noise. if (S.size() > 0) { long[] alldims = BitsUtil.ones(d); result.addToplevelCluster( new Cluster<>( S, true, new SubspaceModel(new Subspace(alldims), Centroid.make(relation, S).getArrayRef()))); } LOG.setCompleted(cprogress); return result; }