Exemple #1
0
  /**
   * DBSCAN-function expandCluster.
   *
   * <p>Border-Objects become members of the first possible cluster.
   *
   * @param relation Database relation to run on
   * @param rangeQuery Range query to use
   * @param startObjectID potential seed of a new potential cluster
   * @param objprog the progress object for logging the current status
   */
  protected void expandCluster(
      Relation<O> relation,
      RangeQuery<O> rangeQuery,
      DBIDRef startObjectID,
      FiniteProgress objprog,
      IndefiniteProgress clusprog) {
    DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
    ncounter += neighbors.size();

    // startObject is no core-object
    if (neighbors.size() < minpts) {
      noise.add(startObjectID);
      processedIDs.add(startObjectID);
      if (objprog != null) {
        objprog.incrementProcessed(LOG);
      }
      return;
    }

    ModifiableDBIDs currentCluster = DBIDUtil.newArray();
    currentCluster.add(startObjectID);
    processedIDs.add(startObjectID);

    // try to expand the cluster
    HashSetModifiableDBIDs seeds = DBIDUtil.newHashSet();
    processNeighbors(neighbors.iter(), currentCluster, seeds);

    DBIDVar o = DBIDUtil.newVar();
    while (!seeds.isEmpty()) {
      seeds.pop(o);
      neighbors = rangeQuery.getRangeForDBID(o, epsilon);
      ncounter += neighbors.size();

      if (neighbors.size() >= minpts) {
        processNeighbors(neighbors.iter(), currentCluster, seeds);
      }

      if (objprog != null) {
        objprog.incrementProcessed(LOG);
      }
    }
    resultList.add(currentCluster);
    if (clusprog != null) {
      clusprog.setProcessed(resultList.size(), LOG);
    }
  }
Exemple #2
0
  /**
   * Run the DBSCAN algorithm
   *
   * @param relation Data relation
   * @param rangeQuery Range query class
   */
  protected void runDBSCAN(Relation<O> relation, RangeQuery<O> rangeQuery) {
    final int size = relation.size();
    FiniteProgress objprog =
        LOG.isVerbose() ? new FiniteProgress("Processing objects", size, LOG) : null;
    IndefiniteProgress clusprog =
        LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;

    processedIDs = DBIDUtil.newHashSet(size);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
      if (!processedIDs.contains(iditer)) {
        expandCluster(relation, rangeQuery, iditer, objprog, clusprog);
      }
      if (objprog != null && clusprog != null) {
        objprog.setProcessed(processedIDs.size(), LOG);
        clusprog.setProcessed(resultList.size(), LOG);
      }
      if (processedIDs.size() == size) {
        break;
      }
    }
    // Finish progress logging
    LOG.ensureCompleted(objprog);
    LOG.setCompleted(clusprog);
  }
Exemple #3
0
  /**
   * Performs a single run of FastDOC, finding a single cluster.
   *
   * @param database Database context
   * @param relation used to get actual values for DBIDs.
   * @param S The set of points we're working on.
   * @param d Dimensionality of the data set we're currently working on.
   * @param r Size of random samples.
   * @param m Number of inner iterations (per seed point).
   * @param n Number of outer iterations (seed points).
   * @return a cluster, if one is found, else <code>null</code>.
   */
  private Cluster<SubspaceModel> runFastDOC(
      Database database, Relation<V> relation, ArrayModifiableDBIDs S, int d, int n, int m, int r) {
    // Relevant attributes of highest cardinality.
    long[] D = null;
    // The seed point for the best dimensions.
    DBIDVar dV = DBIDUtil.newVar();

    // Inform the user about the progress in the current iteration.
    FiniteProgress iprogress =
        LOG.isVerbose()
            ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG)
            : null;

    Random random = rnd.getSingleThreadedRandom();

    DBIDArrayIter iter = S.iter();
    outer:
    for (int i = 0; i < n; ++i) {
      // Pick a random seed point.
      iter.seek(random.nextInt(S.size()));

      for (int j = 0; j < m; ++j) {
        // Choose a set of random points.
        DBIDs randomSet = DBIDUtil.randomSample(S, r, random);

        // Initialize cluster info.
        long[] nD = BitsUtil.zero(d);

        // Test each dimension.
        for (int k = 0; k < d; ++k) {
          if (dimensionIsRelevant(k, relation, randomSet)) {
            BitsUtil.setI(nD, k);
          }
        }

        if (D == null || BitsUtil.cardinality(nD) > BitsUtil.cardinality(D)) {
          D = nD;
          dV.set(iter);

          if (BitsUtil.cardinality(D) >= d_zero) {
            if (iprogress != null) {
              iprogress.setProcessed(iprogress.getTotal(), LOG);
            }
            break outer;
          }
        }
        LOG.incrementProcessed(iprogress);
      }
    }
    LOG.ensureCompleted(iprogress);

    // If no relevant dimensions were found, skip it.
    if (D == null || BitsUtil.cardinality(D) == 0) {
      return null;
    }

    // Get all points in the box.
    SubspaceMaximumDistanceFunction df = new SubspaceMaximumDistanceFunction(D);
    DistanceQuery<V> dq = database.getDistanceQuery(relation, df);
    RangeQuery<V> rq = database.getRangeQuery(dq, DatabaseQuery.HINT_SINGLE);

    // TODO: add filtering capabilities into query API!
    DBIDs C = DBIDUtil.intersection(S, rq.getRangeForDBID(dV, w));

    // If we have a non-empty cluster, return it.
    return (C.size() > 0) ? makeCluster(relation, C, D) : null;
  }