Beispiel #1
0
  /**
   * Builds an "Other Topics" cluster that groups those documents from <code>allDocument</code> that
   * were not referenced in any cluster in <code>clusters</code>.
   *
   * @param allDocuments all documents to check against
   * @param clusters list of clusters with assigned documents
   * @param label label for the "Other Topics" group
   * @return the "Other Topics" cluster
   */
  public static Cluster buildOtherTopics(
      List<Document> allDocuments, List<Cluster> clusters, String label) {
    final Set<Document> unclusteredDocuments = Sets.newLinkedHashSet(allDocuments);
    final Set<Document> assignedDocuments = Sets.newHashSet();

    for (Cluster cluster : clusters) {
      collectAllDocuments(cluster, assignedDocuments);
    }

    unclusteredDocuments.removeAll(assignedDocuments);

    final Cluster otherTopics = new Cluster(label);
    otherTopics.addDocuments(unclusteredDocuments);
    otherTopics.setOtherTopics(true);

    return otherTopics;
  }
Beispiel #2
0
  /** A recursive routine for collecting unique documents from this cluster and subclusters. */
  private static Set<Document> collectAllDocuments(Cluster cluster, Set<Document> docs) {
    if (cluster == null) {
      return docs;
    }

    docs.addAll(cluster.getDocuments());

    final List<Cluster> subclusters = cluster.getSubclusters();
    for (final Cluster subcluster : subclusters) {
      collectAllDocuments(subcluster, docs);
    }

    return docs;
  }