Java IndirectSort Examples

Programming Language: Java

Namespace/Package Name: com.carrotsearch.hppc.sorting

Class/Type: IndirectSort

Examples at hotexamples.com: 3

Java IndirectSort - 3 examples found. These are the top rated real world Java examples of com.carrotsearch.hppc.sorting.IndirectSort extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

mergesort(3)

Example #1

Show file

File: IndirectSorterTest.java Project: carrot2/carrot2

 private void check(int start, int length, IndirectComparator comparator, int[] expectedOrder) {
   int[] order = IndirectSort.mergesort(start, length, comparator);
   assertThat(order).isEqualTo(expectedOrder);
 }

Example #2

Show file

File: IndirectSorterTest.java Project: carrot2/carrot2

 private <T> void check(
     T[] input, int start, int length, Comparator<T> comparator, int[] expectedOrder) {
   int[] order = IndirectSort.mergesort(input, start, length, comparator);
   assertThat(order).isEqualTo(expectedOrder);
 }

Example #3

Show file

File: TermDocumentMatrixBuilder.java Project: renodim/carrot2

  /**
   * Builds a term document matrix from data provided in the <code>context</code>, stores the result
   * in there.
   */
  public void buildTermDocumentMatrix(VectorSpaceModelContext vsmContext) {
    final PreprocessingContext preprocessingContext = vsmContext.preprocessingContext;

    final int documentCount = preprocessingContext.documents.size();
    final int[] stemsTf = preprocessingContext.allStems.tf;
    final int[][] stemsTfByDocument = preprocessingContext.allStems.tfByDocument;
    final byte[] stemsFieldIndices = preprocessingContext.allStems.fieldIndices;

    if (documentCount == 0) {
      vsmContext.termDocumentMatrix = new DenseDoubleMatrix2D(0, 0);
      vsmContext.stemToRowIndex = new IntIntHashMap();
      return;
    }

    // Determine the index of the title field
    int titleFieldIndex = -1;
    final String[] fieldsName = preprocessingContext.allFields.name;
    for (int i = 0; i < fieldsName.length; i++) {
      if (Document.TITLE.equals(fieldsName[i])) {
        titleFieldIndex = i;
        break;
      }
    }

    // Determine the stems we, ideally, should include in the matrix
    int[] stemsToInclude = computeRequiredStemIndices(preprocessingContext);

    // Sort stems by weight, so that stems get included in the matrix in the order
    // of frequency
    final double[] stemsWeight = new double[stemsToInclude.length];
    for (int i = 0; i < stemsToInclude.length; i++) {
      final int stemIndex = stemsToInclude[i];
      stemsWeight[i] =
          termWeighting.calculateTermWeight(
                  stemsTf[stemIndex], stemsTfByDocument[stemIndex].length / 2, documentCount)
              * getWeightBoost(titleFieldIndex, stemsFieldIndices[stemIndex]);
    }
    final int[] stemWeightOrder =
        IndirectSort.mergesort(
            0, stemsWeight.length, new IndirectComparator.DescendingDoubleComparator(stemsWeight));

    // Calculate the number of terms we can include to fulfill the max matrix size
    final int maxRows = maximumMatrixSize / documentCount;
    final DoubleMatrix2D tdMatrix =
        new DenseDoubleMatrix2D(Math.min(maxRows, stemsToInclude.length), documentCount);

    for (int i = 0; i < stemWeightOrder.length && i < maxRows; i++) {
      final int stemIndex = stemsToInclude[stemWeightOrder[i]];
      final int[] tfByDocument = stemsTfByDocument[stemIndex];
      final int df = tfByDocument.length / 2;
      final byte fieldIndices = stemsFieldIndices[stemIndex];

      for (int j = 0; j < df; j++) {
        double weight =
            termWeighting.calculateTermWeight(tfByDocument[j * 2 + 1], df, documentCount);

        weight *= getWeightBoost(titleFieldIndex, fieldIndices);
        tdMatrix.set(i, tfByDocument[j * 2], weight);
      }
    }

    // Convert stemsToInclude into tdMatrixStemIndices
    final IntIntHashMap stemToRowIndex = new IntIntHashMap();
    for (int i = 0; i < stemWeightOrder.length && i < tdMatrix.rows(); i++) {
      stemToRowIndex.put(stemsToInclude[stemWeightOrder[i]], i);
    }

    // Store the results
    vsmContext.termDocumentMatrix = tdMatrix;
    vsmContext.stemToRowIndex = stemToRowIndex;
  }