Ejemplos de Cluster.addDocuments en Java

Lenguaje de programación: Java

Namespace/Package Name: org.carrot2.core

Clase / Tipo: Cluster

Método / Función: addDocuments

Ejemplos en hotexamples.com: 6

Java Cluster.addDocuments - 6 ejemplos encontrados. Estos son los ejemplos en Java del mundo real mejor valorados de org.carrot2.core.Cluster.addDocuments extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

addDocuments(6)

getSubclusters(5)

appendOtherTopics(4)

addPhrases(4)

getAllDocuments(3)

getLabel(3)

getDocuments(3)

flatten(2)

collectAllDocuments(2)

byReversedWeightedScoreAndSizeComparator(2)

isOtherTopics(2)

setAttribute(2)

size(2)

getAttribute(1)

assignClusterIds(1)

id(1)

setOtherTopics(1)

setScore(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: ProcessingResult.java Proyecto: renodim/carrot2

  /** Replace document refids with the actual references upon deserialization. */
  private void documentIdToReference(Cluster cluster, Map<String, Document> documents) {
    if (cluster.documentIds != null) {
      for (Cluster.DocumentRefid documentRefid : cluster.documentIds) {
        cluster.addDocuments(documents.get(documentRefid.refid));
      }
    }

    for (Cluster subcluster : cluster.getSubclusters()) {
      documentIdToReference(subcluster, documents);
    }
  }

Ejemplo n.º 2

Mostrar archivo

Archivo: ByFieldClusteringAlgorithm.java Proyecto: changbai1980/carrot2

  private void addToCluster(Map<Object, Cluster> clusters, Object fieldValue, Document document) {
    if (fieldValue == null) {
      return;
    }

    Cluster cluster = clusters.get(fieldValue);
    if (cluster == null) {
      cluster = new Cluster();
      cluster.addPhrases(buildClusterLabel(fieldValue));
      clusters.put(fieldValue, cluster);
    }

    cluster.addDocuments(document);
  }

Ejemplo n.º 3

Mostrar archivo

Archivo: Cluster.java Proyecto: ashish0038/rahasia

  /**
   * Builds an "Other Topics" cluster that groups those documents from <code>allDocument</code> that
   * were not referenced in any cluster in <code>clusters</code>.
   *
   * @param allDocuments all documents to check against
   * @param clusters list of clusters with assigned documents
   * @param label label for the "Other Topics" group
   * @return the "Other Topics" cluster
   */
  public static Cluster buildOtherTopics(
      List<Document> allDocuments, List<Cluster> clusters, String label) {
    final Set<Document> unclusteredDocuments = Sets.newLinkedHashSet(allDocuments);
    final Set<Document> assignedDocuments = Sets.newHashSet();

    for (Cluster cluster : clusters) {
      collectAllDocuments(cluster, assignedDocuments);
    }

    unclusteredDocuments.removeAll(assignedDocuments);

    final Cluster otherTopics = new Cluster(label);
    otherTopics.addDocuments(unclusteredDocuments);
    otherTopics.setOtherTopics(true);

    return otherTopics;
  }

Ejemplo n.º 4

Mostrar archivo

Archivo: STCClusteringAlgorithm.java Proyecto: nkabir/carrot2

  /**
   * Create the junk (unassigned documents) cluster and create the final set of clusters in Carrot2
   * format.
   */
  private void postProcessing(ArrayList<ClusterCandidate> clusters) {
    // Adapt to Carrot2 classes, counting used documents on the way.
    final BitSet all = new BitSet(documents.size());
    final ArrayList<Document> docs = Lists.newArrayListWithCapacity(documents.size());
    final ArrayList<String> phrases = Lists.newArrayListWithCapacity(3);
    for (ClusterCandidate c : clusters) {
      final Cluster c2 = new Cluster();
      c2.addPhrases(collectPhrases(phrases, c));
      c2.addDocuments(collectDocuments(docs, c.documents));
      c2.setScore((double) c.score);
      this.clusters.add(c2);

      all.or(c.documents);
      docs.clear();
      phrases.clear();
    }

    Cluster.appendOtherTopics(this.documents, this.clusters);
  }

Ejemplo n.º 5

Mostrar archivo

Archivo: Cluster.java Proyecto: ashish0038/rahasia

 /**
  * Creates a {@link Cluster} with the provided <code>phrase</code> to be used as the cluster's
  * label and <code>documents</code> contained in the cluster.
  *
  * @param phrase the phrase to form the cluster's label
  * @param documents documents contained in the cluster
  */
 public Cluster(String phrase, Document... documents) {
   addPhrases(phrase);
   addDocuments(documents);
 }

Ejemplo n.º 6

Mostrar archivo

Archivo: LingoClusteringAlgorithm.java Proyecto: kimgungoo/carrot2

  /**
   * Performs the actual clustering with an assumption that all documents are written in one <code>
   * language</code>.
   */
  private void cluster(LanguageCode language) {
    // Preprocessing of documents
    final PreprocessingContext context =
        preprocessingPipeline.preprocess(documents, query, language);

    // Further processing only if there are words to process
    clusters = Lists.newArrayList();
    if (context.hasLabels()) {
      // Term-document matrix building and reduction
      final VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(context);
      final ReducedVectorSpaceModelContext reducedVsmContext =
          new ReducedVectorSpaceModelContext(vsmContext);
      LingoProcessingContext lingoContext = new LingoProcessingContext(reducedVsmContext);

      matrixBuilder.buildTermDocumentMatrix(vsmContext);
      matrixBuilder.buildTermPhraseMatrix(vsmContext);

      matrixReducer.reduce(
          reducedVsmContext, computeClusterCount(desiredClusterCountBase, documents.size()));

      // Cluster label building
      clusterBuilder.buildLabels(lingoContext, matrixBuilder.termWeighting);

      // Document assignment
      clusterBuilder.assignDocuments(lingoContext);

      // Cluster merging
      clusterBuilder.merge(lingoContext);

      // Format final clusters
      final int[] clusterLabelIndex = lingoContext.clusterLabelFeatureIndex;
      final BitSet[] clusterDocuments = lingoContext.clusterDocuments;
      final double[] clusterLabelScore = lingoContext.clusterLabelScore;
      for (int i = 0; i < clusterLabelIndex.length; i++) {
        final Cluster cluster = new Cluster();

        final int labelFeature = clusterLabelIndex[i];
        if (labelFeature < 0) {
          // Cluster removed during merging
          continue;
        }

        // Add label and score
        cluster.addPhrases(labelFormatter.format(context, labelFeature));
        cluster.setAttribute(Cluster.SCORE, clusterLabelScore[i]);

        // Add documents
        final BitSet bs = clusterDocuments[i];
        for (int bit = bs.nextSetBit(0); bit >= 0; bit = bs.nextSetBit(bit + 1)) {
          cluster.addDocuments(documents.get(bit));
        }

        // Add cluster
        clusters.add(cluster);
      }

      Collections.sort(clusters, Cluster.byReversedWeightedScoreAndSizeComparator(scoreWeight));
    }

    Cluster.appendOtherTopics(documents, clusters);
  }