Ejemplos de Cluster.appendOtherTopics en Java

Lenguaje de programación: Java

Namespace/Package Name: org.carrot2.core

Clase / Tipo: Cluster

Método / Función: appendOtherTopics

Ejemplos en hotexamples.com: 4

Java Cluster.appendOtherTopics - 4 ejemplos encontrados. Estos son los ejemplos en Java del mundo real mejor valorados de org.carrot2.core.Cluster.appendOtherTopics extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

addDocuments(6)

getSubclusters(5)

appendOtherTopics(4)

addPhrases(4)

getAllDocuments(3)

getLabel(3)

getDocuments(3)

flatten(2)

collectAllDocuments(2)

byReversedWeightedScoreAndSizeComparator(2)

isOtherTopics(2)

setAttribute(2)

size(2)

getAttribute(1)

assignClusterIds(1)

id(1)

setOtherTopics(1)

setScore(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: ByFieldClusteringAlgorithm.java Proyecto: changbai1980/carrot2

  /** Performs by URL clustering. */
  @Override
  public void process() throws ProcessingException {
    final Map<Object, Cluster> clusterMap = Maps.newHashMap();
    for (Document document : documents) {
      final Object field = document.getField(fieldName);
      if (field instanceof Collection<?>) {
        for (Object value : (Collection<?>) field) {
          addToCluster(clusterMap, value, document);
        }
      } else {
        addToCluster(clusterMap, field, document);
      }
    }

    clusters = Lists.newArrayList(clusterMap.values());
    Collections.sort(clusters, Cluster.BY_REVERSED_SIZE_AND_LABEL_COMPARATOR);
    Cluster.appendOtherTopics(documents, clusters);
  }

Ejemplo n.º 2

Mostrar archivo

Archivo: STCClusteringAlgorithm.java Proyecto: nkabir/carrot2

  /**
   * Create the junk (unassigned documents) cluster and create the final set of clusters in Carrot2
   * format.
   */
  private void postProcessing(ArrayList<ClusterCandidate> clusters) {
    // Adapt to Carrot2 classes, counting used documents on the way.
    final BitSet all = new BitSet(documents.size());
    final ArrayList<Document> docs = Lists.newArrayListWithCapacity(documents.size());
    final ArrayList<String> phrases = Lists.newArrayListWithCapacity(3);
    for (ClusterCandidate c : clusters) {
      final Cluster c2 = new Cluster();
      c2.addPhrases(collectPhrases(phrases, c));
      c2.addDocuments(collectDocuments(docs, c.documents));
      c2.setScore((double) c.score);
      this.clusters.add(c2);

      all.or(c.documents);
      docs.clear();
      phrases.clear();
    }

    Cluster.appendOtherTopics(this.documents, this.clusters);
  }

Ejemplo n.º 3

Mostrar archivo

Archivo: Cluster.java Proyecto: ashish0038/rahasia

 /**
  * If there are unclustered documents, appends the "Other Topics" group to the <code>clusters
  * </code>.
  *
  * @see #buildOtherTopics(List, List)
  */
 public static void appendOtherTopics(List<Document> allDocuments, List<Cluster> clusters) {
   appendOtherTopics(allDocuments, clusters, OTHER_TOPICS_LABEL);
 }

Ejemplo n.º 4

Mostrar archivo

Archivo: LingoClusteringAlgorithm.java Proyecto: kimgungoo/carrot2

  /**
   * Performs the actual clustering with an assumption that all documents are written in one <code>
   * language</code>.
   */
  private void cluster(LanguageCode language) {
    // Preprocessing of documents
    final PreprocessingContext context =
        preprocessingPipeline.preprocess(documents, query, language);

    // Further processing only if there are words to process
    clusters = Lists.newArrayList();
    if (context.hasLabels()) {
      // Term-document matrix building and reduction
      final VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(context);
      final ReducedVectorSpaceModelContext reducedVsmContext =
          new ReducedVectorSpaceModelContext(vsmContext);
      LingoProcessingContext lingoContext = new LingoProcessingContext(reducedVsmContext);

      matrixBuilder.buildTermDocumentMatrix(vsmContext);
      matrixBuilder.buildTermPhraseMatrix(vsmContext);

      matrixReducer.reduce(
          reducedVsmContext, computeClusterCount(desiredClusterCountBase, documents.size()));

      // Cluster label building
      clusterBuilder.buildLabels(lingoContext, matrixBuilder.termWeighting);

      // Document assignment
      clusterBuilder.assignDocuments(lingoContext);

      // Cluster merging
      clusterBuilder.merge(lingoContext);

      // Format final clusters
      final int[] clusterLabelIndex = lingoContext.clusterLabelFeatureIndex;
      final BitSet[] clusterDocuments = lingoContext.clusterDocuments;
      final double[] clusterLabelScore = lingoContext.clusterLabelScore;
      for (int i = 0; i < clusterLabelIndex.length; i++) {
        final Cluster cluster = new Cluster();

        final int labelFeature = clusterLabelIndex[i];
        if (labelFeature < 0) {
          // Cluster removed during merging
          continue;
        }

        // Add label and score
        cluster.addPhrases(labelFormatter.format(context, labelFeature));
        cluster.setAttribute(Cluster.SCORE, clusterLabelScore[i]);

        // Add documents
        final BitSet bs = clusterDocuments[i];
        for (int bit = bs.nextSetBit(0); bit >= 0; bit = bs.nextSetBit(bit + 1)) {
          cluster.addDocuments(documents.get(bit));
        }

        // Add cluster
        clusters.add(cluster);
      }

      Collections.sort(clusters, Cluster.byReversedWeightedScoreAndSizeComparator(scoreWeight));
    }

    Cluster.appendOtherTopics(documents, clusters);
  }