示例#1
0
文件: SUBCLU.java 项目: fjfd/elki
  /**
   * Generates {@code d+1}-dimensional subspace candidates from the specified {@code d}-dimensional
   * subspaces.
   *
   * @param subspaces the {@code d}-dimensional subspaces
   * @return the {@code d+1}-dimensional subspace candidates
   */
  private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) {
    List<Subspace> candidates = new ArrayList<>();

    if (subspaces.isEmpty()) {
      return candidates;
    }

    // Generate (d+1)-dimensional candidate subspaces
    int d = subspaces.get(0).dimensionality();

    StringBuilder msgFine = new StringBuilder("\n");
    if (LOG.isDebuggingFiner()) {
      msgFine.append("subspaces ").append(subspaces).append('\n');
    }

    for (int i = 0; i < subspaces.size(); i++) {
      Subspace s1 = subspaces.get(i);
      for (int j = i + 1; j < subspaces.size(); j++) {
        Subspace s2 = subspaces.get(j);
        Subspace candidate = s1.join(s2);

        if (candidate != null) {
          if (LOG.isDebuggingFiner()) {
            msgFine.append("candidate: ").append(candidate.dimensonsToString()).append('\n');
          }
          // prune irrelevant candidate subspaces
          List<Subspace> lowerSubspaces = lowerSubspaces(candidate);
          if (LOG.isDebuggingFiner()) {
            msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append('\n');
          }
          boolean irrelevantCandidate = false;
          for (Subspace s : lowerSubspaces) {
            if (!subspaces.contains(s)) {
              irrelevantCandidate = true;
              break;
            }
          }
          if (!irrelevantCandidate) {
            candidates.add(candidate);
          }
        }
      }
    }

    if (LOG.isDebuggingFiner()) {
      LOG.debugFiner(msgFine.toString());
    }
    if (LOG.isDebugging()) {
      StringBuilder msg = new StringBuilder();
      msg.append(d + 1).append("-dimensional candidate subspaces: ");
      for (Subspace candidate : candidates) {
        msg.append(candidate.dimensonsToString()).append(' ');
      }
      LOG.debug(msg.toString());
    }

    return candidates;
  }
示例#2
0
文件: SUBCLU.java 项目: fjfd/elki
  /**
   * Performs the SUBCLU algorithm on the given database.
   *
   * @param relation Relation to process
   * @return Clustering result
   */
  public Clustering<SubspaceModel> run(Relation<V> relation) {
    final int dimensionality = RelationUtil.dimensionality(relation);

    StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;

    // Generate all 1-dimensional clusters
    LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");

    // mapping of dimensionality to set of subspaces
    HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();

    // list of 1-dimensional subspaces containing clusters
    List<Subspace> s_1 = new ArrayList<>();
    subspaceMap.put(0, s_1);

    // mapping of subspaces to list of clusters
    TreeMap<Subspace, List<Cluster<Model>>> clusterMap =
        new TreeMap<>(new Subspace.DimensionComparator());

    for (int d = 0; d < dimensionality; d++) {
      Subspace currentSubspace = new Subspace(d);
      List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);

      if (LOG.isDebuggingFiner()) {
        StringBuilder msg = new StringBuilder();
        msg.append('\n')
            .append(clusters.size())
            .append(" clusters in subspace ")
            .append(currentSubspace.dimensonsToString())
            .append(": \n");
        for (Cluster<Model> cluster : clusters) {
          msg.append("      " + cluster.getIDs() + "\n");
        }
        LOG.debugFiner(msg.toString());
      }

      if (!clusters.isEmpty()) {
        s_1.add(currentSubspace);
        clusterMap.put(currentSubspace, clusters);
      }
    }

    // Generate (d+1)-dimensional clusters from d-dimensional clusters
    for (int d = 0; d < dimensionality - 1; d++) {
      if (stepprog != null) {
        stepprog.beginStep(
            d + 2,
            "Generate "
                + (d + 2)
                + "-dimensional clusters from "
                + (d + 1)
                + "-dimensional clusters.",
            LOG);
      }

      List<Subspace> subspaces = subspaceMap.get(d);
      if (subspaces == null || subspaces.isEmpty()) {
        if (stepprog != null) {
          for (int dim = d + 1; dim < dimensionality - 1; dim++) {
            stepprog.beginStep(
                dim + 2,
                "Generation of"
                    + (dim + 2)
                    + "-dimensional clusters not applicable, because no more "
                    + (d + 2)
                    + "-dimensional subspaces found.",
                LOG);
          }
        }
        break;
      }

      List<Subspace> candidates = generateSubspaceCandidates(subspaces);
      List<Subspace> s_d = new ArrayList<>();

      for (Subspace candidate : candidates) {
        Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
        if (LOG.isDebuggingFine()) {
          LOG.debugFine(
              "best subspace of "
                  + candidate.dimensonsToString()
                  + ": "
                  + bestSubspace.dimensonsToString());
        }

        List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
        List<Cluster<Model>> clusters = new ArrayList<>();
        for (Cluster<Model> cluster : bestSubspaceClusters) {
          List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
          if (!candidateClusters.isEmpty()) {
            clusters.addAll(candidateClusters);
          }
        }

        if (LOG.isDebuggingFine()) {
          StringBuilder msg = new StringBuilder();
          msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
          for (Cluster<Model> c : clusters) {
            msg.append("      " + c.getIDs() + "\n");
          }
          LOG.debugFine(msg.toString());
        }

        if (!clusters.isEmpty()) {
          s_d.add(candidate);
          clusterMap.put(candidate, clusters);
        }
      }

      if (!s_d.isEmpty()) {
        subspaceMap.put(d + 1, s_d);
      }
    }

    // build result
    int numClusters = 1;
    result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
    for (Subspace subspace : clusterMap.descendingKeySet()) {
      List<Cluster<Model>> clusters = clusterMap.get(subspace);
      for (Cluster<Model> cluster : clusters) {
        Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs());
        newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs())));
        newCluster.setName("cluster_" + numClusters++);
        result.addToplevelCluster(newCluster);
      }
    }

    LOG.setCompleted(stepprog);
    return result;
  }