예제 #1
0
  /** Replace document refids with the actual references upon deserialization. */
  private void documentIdToReference(Cluster cluster, Map<String, Document> documents) {
    if (cluster.documentIds != null) {
      for (Cluster.DocumentRefid documentRefid : cluster.documentIds) {
        cluster.addDocuments(documents.get(documentRefid.refid));
      }
    }

    for (Cluster subcluster : cluster.getSubclusters()) {
      documentIdToReference(subcluster, documents);
    }
  }
예제 #2
0
  /**
   * Builds an "Other Topics" cluster that groups those documents from <code>allDocument</code> that
   * were not referenced in any cluster in <code>clusters</code>.
   *
   * @param allDocuments all documents to check against
   * @param clusters list of clusters with assigned documents
   * @param label label for the "Other Topics" group
   * @return the "Other Topics" cluster
   */
  public static Cluster buildOtherTopics(
      List<Document> allDocuments, List<Cluster> clusters, String label) {
    final Set<Document> unclusteredDocuments = Sets.newLinkedHashSet(allDocuments);
    final Set<Document> assignedDocuments = Sets.newHashSet();

    for (Cluster cluster : clusters) {
      collectAllDocuments(cluster, assignedDocuments);
    }

    unclusteredDocuments.removeAll(assignedDocuments);

    final Cluster otherTopics = new Cluster(label);
    otherTopics.addDocuments(unclusteredDocuments);
    otherTopics.setOtherTopics(true);

    return otherTopics;
  }