Пример #1
0
  int calculateH(Cluster cluster) {
    final Map<Object, Integer> documentCountByPartition =
        getDocumentCountByPartition(cluster.getAllDocuments());

    final ArrayList<Integer> counts = Lists.newArrayList();
    counts.addAll(documentCountByPartition.values());

    return calculateH(counts);
  }
  private void addToCluster(Map<Object, Cluster> clusters, Object fieldValue, Document document) {
    if (fieldValue == null) {
      return;
    }

    Cluster cluster = clusters.get(fieldValue);
    if (cluster == null) {
      cluster = new Cluster();
      cluster.addPhrases(buildClusterLabel(fieldValue));
      clusters.put(fieldValue, cluster);
    }

    cluster.addDocuments(document);
  }
  /** Performs by URL clustering. */
  @Override
  public void process() throws ProcessingException {
    final Map<Object, Cluster> clusterMap = Maps.newHashMap();
    for (Document document : documents) {
      final Object field = document.getField(fieldName);
      if (field instanceof Collection<?>) {
        for (Object value : (Collection<?>) field) {
          addToCluster(clusterMap, value, document);
        }
      } else {
        addToCluster(clusterMap, field, document);
      }
    }

    clusters = Lists.newArrayList(clusterMap.values());
    Collections.sort(clusters, Cluster.BY_REVERSED_SIZE_AND_LABEL_COMPARATOR);
    Cluster.appendOtherTopics(documents, clusters);
  }