Java LabelFormatter примеры использования

Язык программирования: Java

Пространство имен/Пакет: org.carrot2.text.preprocessing

Класс/Тип: LabelFormatter

Примеров на hotexamples.com: 2

Java LabelFormatter - 2 примера найдено. Это лучшие примеры Java кода для org.carrot2.text.preprocessing.LabelFormatter, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

format(2)

Пример #1

Показать файл

Файл: STCClusteringAlgorithm.java Проект: nkabir/carrot2

  /** Build the cluster's label from suffix tree edge indices. */
  private String buildLabel(int[] phraseIndices) {
    final boolean joinWithSpace =
        context.language.getLanguageCode() != LanguageCode.CHINESE_SIMPLIFIED;

    // Count the number of terms first.
    int termsCount = 0;
    for (int j = 0; j < phraseIndices.length; j += 2) {
      termsCount += phraseIndices[j + 1] - phraseIndices[j] + 1;
    }

    // Extract terms info for the phrase and construct the label.
    final boolean[] stopwords = new boolean[termsCount];
    final char[][] images = new char[termsCount][];
    final short[] tokenTypes = context.allWords.type;

    int k = 0;
    for (int i = 0; i < phraseIndices.length; i += 2) {
      for (int j = phraseIndices[i]; j <= phraseIndices[i + 1]; j++, k++) {
        final int termIndex = sb.input.get(j);
        images[k] = context.allWords.image[termIndex];
        stopwords[k] = TokenTypeUtils.isCommon(tokenTypes[termIndex]);
      }
    }

    return LabelFormatter.format(images, stopwords, joinWithSpace);
  }

Пример #2

Показать файл

Файл: LingoClusteringAlgorithm.java Проект: kimgungoo/carrot2

  /**
   * Performs the actual clustering with an assumption that all documents are written in one <code>
   * language</code>.
   */
  private void cluster(LanguageCode language) {
    // Preprocessing of documents
    final PreprocessingContext context =
        preprocessingPipeline.preprocess(documents, query, language);

    // Further processing only if there are words to process
    clusters = Lists.newArrayList();
    if (context.hasLabels()) {
      // Term-document matrix building and reduction
      final VectorSpaceModelContext vsmContext = new VectorSpaceModelContext(context);
      final ReducedVectorSpaceModelContext reducedVsmContext =
          new ReducedVectorSpaceModelContext(vsmContext);
      LingoProcessingContext lingoContext = new LingoProcessingContext(reducedVsmContext);

      matrixBuilder.buildTermDocumentMatrix(vsmContext);
      matrixBuilder.buildTermPhraseMatrix(vsmContext);

      matrixReducer.reduce(
          reducedVsmContext, computeClusterCount(desiredClusterCountBase, documents.size()));

      // Cluster label building
      clusterBuilder.buildLabels(lingoContext, matrixBuilder.termWeighting);

      // Document assignment
      clusterBuilder.assignDocuments(lingoContext);

      // Cluster merging
      clusterBuilder.merge(lingoContext);

      // Format final clusters
      final int[] clusterLabelIndex = lingoContext.clusterLabelFeatureIndex;
      final BitSet[] clusterDocuments = lingoContext.clusterDocuments;
      final double[] clusterLabelScore = lingoContext.clusterLabelScore;
      for (int i = 0; i < clusterLabelIndex.length; i++) {
        final Cluster cluster = new Cluster();

        final int labelFeature = clusterLabelIndex[i];
        if (labelFeature < 0) {
          // Cluster removed during merging
          continue;
        }

        // Add label and score
        cluster.addPhrases(labelFormatter.format(context, labelFeature));
        cluster.setAttribute(Cluster.SCORE, clusterLabelScore[i]);

        // Add documents
        final BitSet bs = clusterDocuments[i];
        for (int bit = bs.nextSetBit(0); bit >= 0; bit = bs.nextSetBit(bit + 1)) {
          cluster.addDocuments(documents.get(bit));
        }

        // Add cluster
        clusters.add(cluster);
      }

      Collections.sort(clusters, Cluster.byReversedWeightedScoreAndSizeComparator(scoreWeight));
    }

    Cluster.appendOtherTopics(documents, clusters);
  }