Java Cluster.addPhrases Examples

Programming Language: Java

Namespace/Package Name: org.carrot2.core

Class/Type: Cluster

Method/Function: addPhrases

Examples at hotexamples.com: 4

Java Cluster.addPhrases - 4 examples found. These are the top rated real world Java examples of org.carrot2.core.Cluster.addPhrases extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

addDocuments(6)

getSubclusters(5)

appendOtherTopics(4)

addPhrases(4)

getAllDocuments(3)

getLabel(3)

getDocuments(3)

flatten(2)

collectAllDocuments(2)

byReversedWeightedScoreAndSizeComparator(2)

isOtherTopics(2)

setAttribute(2)

size(2)

getAttribute(1)

assignClusterIds(1)

id(1)

setOtherTopics(1)

setScore(1)

Example #1

Show file

File: ByFieldClusteringAlgorithm.java Project: changbai1980/carrot2

  private void addToCluster(Map<Object, Cluster> clusters, Object fieldValue, Document document) {
    if (fieldValue == null) {
      return;
    }

    Cluster cluster = clusters.get(fieldValue);
    if (cluster == null) {
      cluster = new Cluster();
      cluster.addPhrases(buildClusterLabel(fieldValue));
      clusters.put(fieldValue, cluster);
    }

    cluster.addDocuments(document);
  }

Example #2

Show file

File: STCClusteringAlgorithm.java Project: nkabir/carrot2

  /**
   * Create the junk (unassigned documents) cluster and create the final set of clusters in Carrot2
   * format.
   */
  private void postProcessing(ArrayList<ClusterCandidate> clusters) {
    // Adapt to Carrot2 classes, counting used documents on the way.
    final BitSet all = new BitSet(documents.size());
    final ArrayList<Document> docs = Lists.newArrayListWithCapacity(documents.size());
    final ArrayList<String> phrases = Lists.newArrayListWithCapacity(3);
    for (ClusterCandidate c : clusters) {
      final Cluster c2 = new Cluster();
      c2.addPhrases(collectPhrases(phrases, c));
      c2.addDocuments(collectDocuments(docs, c.documents));
      c2.setScore((double) c.score);
      this.clusters.add(c2);

      all.or(c.documents);
      docs.clear();
      phrases.clear();
    }

    Cluster.appendOtherTopics(this.documents, this.clusters);
  }

Example #3

Show file

File: Cluster.java Project: ashish0038/rahasia

 /**
  * Creates a {@link Cluster} with the provided <code>phrase</code> to be used as the cluster's
  * label and <code>documents</code> contained in the cluster.
  *
  * @param phrase the phrase to form the cluster's label
  * @param documents documents contained in the cluster
  */
 public Cluster(String phrase, Document... documents) {
   addPhrases(phrase);
   addDocuments(documents);
 }

Example #4

Show file

File: LingoClusteringAlgorithm.java Project: kimgungoo/carrot2

  /**
   * Performs the actual clustering with an assumption that all documents are written in one <code>
   * language</code>.
   */
  private void cluster(LanguageCode language) {
    // Preprocessing of documents
    final PreprocessingContext context =
        preprocessingPipeline.preprocess(documents, query, language);

    // Further processing only if there are words to process
    clusters = Lists.newArrayList();
    if (context.hasLabels()) {
      // Term-document matrix building and reduction
      final VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(context);
      final ReducedVectorSpaceModelContext reducedVsmContext =
          new ReducedVectorSpaceModelContext(vsmContext);
      LingoProcessingContext lingoContext = new LingoProcessingContext(reducedVsmContext);

      matrixBuilder.buildTermDocumentMatrix(vsmContext);
      matrixBuilder.buildTermPhraseMatrix(vsmContext);

      matrixReducer.reduce(
          reducedVsmContext, computeClusterCount(desiredClusterCountBase, documents.size()));

      // Cluster label building
      clusterBuilder.buildLabels(lingoContext, matrixBuilder.termWeighting);

      // Document assignment
      clusterBuilder.assignDocuments(lingoContext);

      // Cluster merging
      clusterBuilder.merge(lingoContext);

      // Format final clusters
      final int[] clusterLabelIndex = lingoContext.clusterLabelFeatureIndex;
      final BitSet[] clusterDocuments = lingoContext.clusterDocuments;
      final double[] clusterLabelScore = lingoContext.clusterLabelScore;
      for (int i = 0; i < clusterLabelIndex.length; i++) {
        final Cluster cluster = new Cluster();

        final int labelFeature = clusterLabelIndex[i];
        if (labelFeature < 0) {
          // Cluster removed during merging
          continue;
        }

        // Add label and score
        cluster.addPhrases(labelFormatter.format(context, labelFeature));
        cluster.setAttribute(Cluster.SCORE, clusterLabelScore[i]);

        // Add documents
        final BitSet bs = clusterDocuments[i];
        for (int bit = bs.nextSetBit(0); bit >= 0; bit = bs.nextSetBit(bit + 1)) {
          cluster.addDocuments(documents.get(bit));
        }

        // Add cluster
        clusters.add(cluster);
      }

      Collections.sort(clusters, Cluster.byReversedWeightedScoreAndSizeComparator(scoreWeight));
    }

    Cluster.appendOtherTopics(documents, clusters);
  }