public List<Polygon> compute() { // Compute delaunay triangulation: delaunay = (new SweepHullDelaunay2D(points)).getDelaunay(); List<Polygon> polys = new ArrayList<>(); // Working data long[] used = BitsUtil.zero(delaunay.size()); List<double[]> cur = new ArrayList<>(); for (int i = 0 /* = used.nextClearBit(0) */; i < delaunay.size() && i >= 0; i = BitsUtil.nextClearBit(used, i + 1)) { if (!BitsUtil.get(used, i)) { BitsUtil.setI(used, i); SweepHullDelaunay2D.Triangle tri = delaunay.get(i); if (tri.r2 <= alpha2) { // Check neighbors processNeighbor(cur, used, i, tri.ab, tri.b); processNeighbor(cur, used, i, tri.bc, tri.c); processNeighbor(cur, used, i, tri.ca, tri.a); } if (cur.size() > 0) { polys.add(new Polygon(cur)); cur = new ArrayList<>(); } } } return polys; }
/** * Select or deselect a row. * * @param rnum Row to select * @param set Value to set */ protected void selectRow(int rnum, boolean set) { if (set) { BitsUtil.setI(rows, rnum); rowcard++; } else { BitsUtil.clearI(rows, rnum); rowcard--; } }
/** * Select or deselect a column. * * @param cnum Column to select * @param set Value to set */ protected void selectColumn(int cnum, boolean set) { if (set) { BitsUtil.setI(cols, cnum); colcard++; } else { BitsUtil.clearI(cols, cnum); colcard--; } }
@Override public long[] getVisibleDimensions2D() { final int dim = proj.getDimensionality(); long[] actDim = BitsUtil.zero(dim); double[] vScale = new double[dim]; for (int d = 0; d < dim; d++) { Arrays.fill(vScale, 0); vScale[d] = 1; double[] vRender = fastProjectScaledToRenderSpace(vScale); // TODO: Can't we do this by inspecting the projection matrix directly? if (vRender[0] > 0.0 || vRender[0] < 0.0 || vRender[1] != 0) { BitsUtil.setI(actDim, d); } } return actDim; }
/** * Main loop of OUTRES. Run for each object * * @param s start dimension * @param subspace Current subspace * @param id Current object ID * @param kernel Kernel * @return Score */ public double outresScore( final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) { double score = 1.0; // Initial score is 1.0 final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace); MeanVariance meanv = new MeanVariance(); for (int i = s; i < kernel.dim; i++) { if (BitsUtil.get(subspace, i)) { // TODO: needed? Or should we always start // with i=0? continue; } BitsUtil.setI(subspace, i); df.setSelectedDimensions(subspace); final double adjustedEps = kernel.adjustedEps(kernel.dim); // Query with a larger window, to also get neighbors of neighbors // Subspace euclidean is metric! final double range = adjustedEps * 2.; RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range); DoubleDBIDList neighc = rq.getRangeForDBID(id, range); DoubleDBIDList neigh = refineRange(neighc, adjustedEps); if (neigh.size() > 2) { // Relevance test if (relevantSubspace(subspace, neigh, kernel)) { final double density = kernel.subspaceDensity(subspace, neigh); // Compute mean and standard deviation for densities of neighbors. meanv.reset(); for (DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel); meanv.put(kernel.subspaceDensity(subspace, n2)); } final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev()); // High deviation: if (deviation >= 1) { score *= (density / deviation); } // Recursion score *= outresScore(i + 1, subspace, id, kernel); } } BitsUtil.clearI(subspace, i); } return score; }
private void processNeighbor(List<double[]> cur, long[] used, int i, int ab, int b) { if (ab >= 0) { if (BitsUtil.get(used, ab)) { return; } BitsUtil.setI(used, ab); final SweepHullDelaunay2D.Triangle next = delaunay.get(ab); if (next.r2 < alpha2) { // Continue where we left off... if (next.ab == i) { processNeighbor(cur, used, ab, next.bc, next.c); processNeighbor(cur, used, ab, next.ca, next.a); } else if (next.bc == i) { processNeighbor(cur, used, ab, next.ca, next.a); processNeighbor(cur, used, ab, next.ab, next.b); } else if (next.ca == i) { processNeighbor(cur, used, ab, next.ab, next.b); processNeighbor(cur, used, ab, next.bc, next.c); } return; } } cur.add(points.get(b)); }
protected void invertRow(int rnum, boolean b) { BitsUtil.setI(irow, rnum); }
/** * Performs a single run of FastDOC, finding a single cluster. * * @param database Database context * @param relation used to get actual values for DBIDs. * @param S The set of points we're working on. * @param d Dimensionality of the data set we're currently working on. * @param r Size of random samples. * @param m Number of inner iterations (per seed point). * @param n Number of outer iterations (seed points). * @return a cluster, if one is found, else <code>null</code>. */ private Cluster<SubspaceModel> runFastDOC( Database database, Relation<V> relation, ArrayModifiableDBIDs S, int d, int n, int m, int r) { // Relevant attributes of highest cardinality. long[] D = null; // The seed point for the best dimensions. DBIDVar dV = DBIDUtil.newVar(); // Inform the user about the progress in the current iteration. FiniteProgress iprogress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG) : null; Random random = rnd.getSingleThreadedRandom(); DBIDArrayIter iter = S.iter(); outer: for (int i = 0; i < n; ++i) { // Pick a random seed point. iter.seek(random.nextInt(S.size())); for (int j = 0; j < m; ++j) { // Choose a set of random points. DBIDs randomSet = DBIDUtil.randomSample(S, r, random); // Initialize cluster info. long[] nD = BitsUtil.zero(d); // Test each dimension. for (int k = 0; k < d; ++k) { if (dimensionIsRelevant(k, relation, randomSet)) { BitsUtil.setI(nD, k); } } if (D == null || BitsUtil.cardinality(nD) > BitsUtil.cardinality(D)) { D = nD; dV.set(iter); if (BitsUtil.cardinality(D) >= d_zero) { if (iprogress != null) { iprogress.setProcessed(iprogress.getTotal(), LOG); } break outer; } } LOG.incrementProcessed(iprogress); } } LOG.ensureCompleted(iprogress); // If no relevant dimensions were found, skip it. if (D == null || BitsUtil.cardinality(D) == 0) { return null; } // Get all points in the box. SubspaceMaximumDistanceFunction df = new SubspaceMaximumDistanceFunction(D); DistanceQuery<V> dq = database.getDistanceQuery(relation, df); RangeQuery<V> rq = database.getRangeQuery(dq, DatabaseQuery.HINT_SINGLE); // TODO: add filtering capabilities into query API! DBIDs C = DBIDUtil.intersection(S, rq.getRangeForDBID(dV, w)); // If we have a non-empty cluster, return it. return (C.size() > 0) ? makeCluster(relation, C, D) : null; }
/** * Performs a single run of DOC, finding a single cluster. * * @param database Database context * @param relation used to get actual values for DBIDs. * @param S The set of points we're working on. * @param d Dimensionality of the data set we're currently working on. * @param r Size of random samples. * @param m Number of inner iterations (per seed point). * @param n Number of outer iterations (seed points). * @param minClusterSize Minimum size a cluster must have to be accepted. * @return a cluster, if one is found, else <code>null</code>. */ private Cluster<SubspaceModel> runDOC( Database database, Relation<V> relation, ArrayModifiableDBIDs S, final int d, int n, int m, int r, int minClusterSize) { // Best cluster for the current run. DBIDs C = null; // Relevant attributes for the best cluster. long[] D = null; // Quality of the best cluster. double quality = Double.NEGATIVE_INFINITY; // Bounds for our cluster. // ModifiableHyperBoundingBox bounds = new ModifiableHyperBoundingBox(new // double[d], new double[d]); // Weights for distance (= rectangle query) SubspaceMaximumDistanceFunction df = new SubspaceMaximumDistanceFunction(BitsUtil.zero(d)); DistanceQuery<V> dq = database.getDistanceQuery(relation, df); RangeQuery<V> rq = database.getRangeQuery(dq); // Inform the user about the progress in the current iteration. FiniteProgress iprogress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG) : null; Random random = rnd.getSingleThreadedRandom(); DBIDArrayIter iter = S.iter(); for (int i = 0; i < n; ++i) { // Pick a random seed point. iter.seek(random.nextInt(S.size())); for (int j = 0; j < m; ++j) { // Choose a set of random points. DBIDs randomSet = DBIDUtil.randomSample(S, r, random); // Initialize cluster info. long[] nD = BitsUtil.zero(d); // Test each dimension and build bounding box. for (int k = 0; k < d; ++k) { if (dimensionIsRelevant(k, relation, randomSet)) { BitsUtil.setI(nD, k); } } if (BitsUtil.cardinality(nD) > 0) { // Get all points in the box. df.setSelectedDimensions(nD); // TODO: add filtering capabilities into query API! DBIDs nC = DBIDUtil.intersection(S, rq.getRangeForDBID(iter, w)); if (LOG.isDebuggingFiner()) { LOG.finer( "Testing a cluster candidate, |C| = " + nC.size() + ", |D| = " + BitsUtil.cardinality(nD)); } // Is the cluster large enough? if (nC.size() < minClusterSize) { // Too small. if (LOG.isDebuggingFiner()) { LOG.finer("... but it's too small."); } } else { // Better cluster than before? double nQuality = computeClusterQuality(nC.size(), BitsUtil.cardinality(nD)); if (nQuality > quality) { if (LOG.isDebuggingFiner()) { LOG.finer("... and it's the best so far: " + nQuality + " vs. " + quality); } C = nC; D = nD; quality = nQuality; } else { if (LOG.isDebuggingFiner()) { LOG.finer("... but we already have a better one."); } } } } LOG.incrementProcessed(iprogress); } } LOG.ensureCompleted(iprogress); return (C != null) ? makeCluster(relation, C, D) : null; }