/** Resets the values for the next cluster search. */ protected void reset() { rows = BitsUtil.ones(rowM.length); rowcard = rowM.length; cols = BitsUtil.ones(colM.length); colcard = colM.length; BitsUtil.zeroI(irow); }
/** * Constructor. * * @param rows Row dimensionality. * @param cols Column dimensionality. */ protected BiclusterCandidate(int rows, int cols) { super(); this.rows = BitsUtil.ones(rows); this.irow = BitsUtil.zero(rows); this.rowcard = rows; this.rowM = new double[rows]; this.cols = BitsUtil.ones(cols); this.colcard = cols; this.colM = new double[cols]; }
@Override public Clustering<BiclusterWithInversionsModel> biclustering() { double[][] mat = RelationUtil.relationAsMatrix(relation, rowIDs); BiclusterCandidate cand = new BiclusterCandidate(getRowDim(), getColDim()); Clustering<BiclusterWithInversionsModel> result = new Clustering<>("Cheng-and-Church", "Cheng and Church Biclustering"); ModifiableDBIDs noise = DBIDUtil.newHashSet(relation.getDBIDs()); FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Extracting Cluster", n, LOG) : null; for (int i = 0; i < n; i++) { cand.reset(); multipleNodeDeletion(mat, cand); if (LOG.isVeryVerbose()) { LOG.veryverbose( "Residue after Alg 2: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard); } singleNodeDeletion(mat, cand); if (LOG.isVeryVerbose()) { LOG.veryverbose( "Residue after Alg 1: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard); } nodeAddition(mat, cand); if (LOG.isVeryVerbose()) { LOG.veryverbose( "Residue after Alg 3: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard); } cand.maskMatrix(mat, dist); BiclusterWithInversionsModel model = new BiclusterWithInversionsModel(colsBitsetToIDs(cand.cols), rowsBitsetToIDs(cand.irow)); final ArrayDBIDs cids = rowsBitsetToIDs(cand.rows); noise.removeDBIDs(cids); result.addToplevelCluster(new Cluster<>(cids, model)); if (LOG.isVerbose()) { LOG.verbose("Score of bicluster " + (i + 1) + ": " + cand.residue + "\n"); LOG.verbose("Number of rows: " + cand.rowcard + "\n"); LOG.verbose("Number of columns: " + cand.colcard + "\n"); // LOG.verbose("Total number of masked values: " + maskedVals.size() + // "\n"); } LOG.incrementProcessed(prog); } // Add a noise cluster, full-dimensional. if (!noise.isEmpty()) { long[] allcols = BitsUtil.ones(getColDim()); BiclusterWithInversionsModel model = new BiclusterWithInversionsModel(colsBitsetToIDs(allcols), DBIDUtil.EMPTYDBIDS); result.addToplevelCluster(new Cluster<>(noise, true, model)); } LOG.ensureCompleted(prog); return result; }
/** * Performs the DOC or FastDOC (as configured) algorithm on the given Database. * * <p>This will run exhaustively, i.e. run DOC until no clusters are found anymore / the database * size has shrunk below the threshold for minimum cluster size. * * @param database Database * @param relation Data relation */ public Clustering<SubspaceModel> run(Database database, Relation<V> relation) { // Dimensionality of our set. final int d = RelationUtil.dimensionality(relation); // Get available DBIDs as a set we can remove items from. ArrayModifiableDBIDs S = DBIDUtil.newArray(relation.getDBIDs()); // Precompute values as described in Figure 2. double r = Math.abs(Math.log(d + d) / Math.log(beta * .5)); // Outer loop count. int n = (int) (2. / alpha); // Inner loop count. int m = (int) (Math.pow(2. / alpha, r) * Math.log(4)); if (heuristics) { m = Math.min(m, Math.min(1000000, d * d)); } // Minimum size for a cluster for it to be accepted. int minClusterSize = (int) (alpha * S.size()); // List of all clusters we found. Clustering<SubspaceModel> result = new Clustering<>("DOC Clusters", "DOC"); // Inform the user about the number of actual clusters found so far. IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null; // To not only find a single cluster, we continue running until our set // of points is empty. while (S.size() > minClusterSize) { Cluster<SubspaceModel> C; if (heuristics) { C = runFastDOC(database, relation, S, d, n, m, (int) r); } else { C = runDOC(database, relation, S, d, n, m, (int) r, minClusterSize); } if (C == null) { // Stop trying if we couldn't find a cluster. break; } // Found a cluster, remember it, remove its points from the set. result.addToplevelCluster(C); // Remove all points of the cluster from the set and continue. S.removeDBIDs(C.getIDs()); if (cprogress != null) { cprogress.setProcessed(result.getAllClusters().size(), LOG); } } // Add the remainder as noise. if (S.size() > 0) { long[] alldims = BitsUtil.ones(d); result.addToplevelCluster( new Cluster<>( S, true, new SubspaceModel(new Subspace(alldims), Centroid.make(relation, S).getArrayRef()))); } LOG.setCompleted(cprogress); return result; }