Java DoubleMatrix2D 예제들, org.apache.mahout.math.matrix.DoubleMatrix2D Java 예제들

예제 #1

0

파일 보기

파일: KMeansMatrixFactorization.java 프로젝트: pombredanne/carrot2

  public void compute() {
    int n = A.columns();

    // Distances to centroids
    DoubleMatrix2D D = new DenseDoubleMatrix2D(k, n);

    // Object-cluster assignments
    V = new DenseDoubleMatrix2D(n, k);

    // Initialize the centroids with some document vectors
    U = new DenseDoubleMatrix2D(A.rows(), k);
    U.assign(A.viewPart(0, 0, A.rows(), k));

    int[] minIndices = new int[D.columns()];
    double[] minValues = new double[D.columns()];

    for (iterationsCompleted = 0; iterationsCompleted < maxIterations; iterationsCompleted++) {
      // Calculate cosine distances
      U.zMult(A, D, 1, 0, true, false);

      V.assign(0);
      U.assign(0);

      // For each object
      MatrixUtils.maxInColumns(D, minIndices, minValues);
      for (int i = 0; i < minIndices.length; i++) {
        V.setQuick(i, minIndices[i], 1);
      }

      // Update centroids
      for (int c = 0; c < V.columns(); c++) {
        // Sum
        int count = 0;
        for (int d = 0; d < V.rows(); d++) {
          if (V.getQuick(d, c) != 0) {
            count++;
            U.viewColumn(c).assign(A.viewColumn(d), Functions.PLUS);
          }
        }

        // Divide
        U.viewColumn(c).assign(Mult.div(count));
        MatrixUtils.normalizeColumnL2(U, null);
      }
    }
  }

예제 #2

0

파일 보기

파일: TermDocumentMatrixBuilder.java 프로젝트: renodim/carrot2

  /**
   * Builds a sparse term-document-like matrix for the provided matrixWordIndices in the same term
   * space as the original term-document matrix.
   */
  static DoubleMatrix2D buildAlignedMatrix(
      VectorSpaceModelContext vsmContext, int[] featureIndex, ITermWeighting termWeighting) {
    final IntIntHashMap stemToRowIndex = vsmContext.stemToRowIndex;
    if (featureIndex.length == 0) {
      return new DenseDoubleMatrix2D(stemToRowIndex.size(), 0);
    }

    final DoubleMatrix2D phraseMatrix =
        new SparseDoubleMatrix2D(stemToRowIndex.size(), featureIndex.length);

    final PreprocessingContext preprocessingContext = vsmContext.preprocessingContext;
    final int[] wordsStemIndex = preprocessingContext.allWords.stemIndex;
    final int[] stemsTf = preprocessingContext.allStems.tf;
    final int[][] stemsTfByDocument = preprocessingContext.allStems.tfByDocument;
    final int[][] phrasesWordIndices = preprocessingContext.allPhrases.wordIndices;
    final int documentCount = preprocessingContext.documents.size();
    final int wordCount = wordsStemIndex.length;

    for (int i = 0; i < featureIndex.length; i++) {
      final int feature = featureIndex[i];
      final int[] wordIndices;
      if (feature < wordCount) {
        wordIndices = new int[] {feature};
      } else {
        wordIndices = phrasesWordIndices[feature - wordCount];
      }

      for (int wordIndex = 0; wordIndex < wordIndices.length; wordIndex++) {
        final int stemIndex = wordsStemIndex[wordIndices[wordIndex]];
        final int index = stemToRowIndex.indexOf(stemIndex);
        if (stemToRowIndex.indexExists(index)) {
          final int rowIndex = stemToRowIndex.indexGet(index);

          double weight =
              termWeighting.calculateTermWeight(
                  stemsTf[stemIndex], stemsTfByDocument[stemIndex].length / 2, documentCount);

          phraseMatrix.setQuick(rowIndex, i, weight);
        }
      }
    }

    return phraseMatrix;
  }

예제 #3

0

파일 보기

파일: TermDocumentMatrixBuilder.java 프로젝트: renodim/carrot2

  /**
   * Builds a term-phrase matrix in the same space as the main term-document matrix. If the
   * processing context contains no phrases, {@link VectorSpaceModelContext#termPhraseMatrix} will
   * remain <code>null</code>.
   */
  public void buildTermPhraseMatrix(VectorSpaceModelContext context) {
    final PreprocessingContext preprocessingContext = context.preprocessingContext;
    final IntIntHashMap stemToRowIndex = context.stemToRowIndex;
    final int[] labelsFeatureIndex = preprocessingContext.allLabels.featureIndex;
    final int firstPhraseIndex = preprocessingContext.allLabels.firstPhraseIndex;

    if (firstPhraseIndex >= 0 && stemToRowIndex.size() > 0) {
      // Build phrase matrix
      int[] phraseFeatureIndices = new int[labelsFeatureIndex.length - firstPhraseIndex];
      for (int featureIndex = 0; featureIndex < phraseFeatureIndices.length; featureIndex++) {
        phraseFeatureIndices[featureIndex] = labelsFeatureIndex[featureIndex + firstPhraseIndex];
      }

      final DoubleMatrix2D phraseMatrix =
          TermDocumentMatrixBuilder.buildAlignedMatrix(
              context, phraseFeatureIndices, termWeighting);
      MatrixUtils.normalizeColumnL2(phraseMatrix, null);
      context.termPhraseMatrix = phraseMatrix.viewDice();
    }
  }

예제 #4

0

파일 보기

파일: NNIDenseDoubleMatrix2D.java 프로젝트: ashish0038/rahasia

  /**
   * A native implementation of Colt's original multiplication method method.
   *
   * <p>NOTE: this method will use native calls only when:
   *
   * <ul>
   *   <li>all input matrices are @link DenseDoubleMatrix2D or its subclasses (e.g. @link
   *       NNIDenseDoubleMatrix2D)
   *   <li>none of the input matrices is a view
   *   <li>the dynamic libraries required by the NNI are available
   * </ul>
   */
  public DoubleMatrix2D zMult(
      DoubleMatrix2D B,
      DoubleMatrix2D C,
      double alpha,
      double beta,
      boolean transposeA,
      boolean transposeB) {
    // A workaround for a bug in DenseDoubleMatrix2D.
    // If B is a SelectedDenseDoubleMatrix the implementation of this method
    // throws a ClassCastException. The workaround is to swap and transpose
    // the arguments and then transpose the result. As SelectedDenseDoubleMatrix2D is
    // package-private, if it was loaded with a different class loader than
    // the one used for this class it would give a VerificationError if we referred
    // to it directly here. Hence the hacky string comparison here.
    //
    if (B.getClass().getName().endsWith("SelectedDenseDoubleMatrix2D")) {
      return B.zMult(this, C, alpha, beta, !transposeB, !transposeA).viewDice();
    }

    // Check the sizes
    int rowsB = (transposeB ? B.columns() : B.rows());
    int columnsB = (transposeB ? B.rows() : B.columns());
    int rowsA = (transposeA ? columns() : rows());
    int columnsA = (transposeA ? rows() : columns());

    if (C == null) {
      C = new NNIDenseDoubleMatrix2D(rowsA, columnsB);
    }

    if (this == C || B == C) {
      throw new IllegalArgumentException("Matrices must not be identical");
    }

    final int rowsC = C.rows();
    final int columnsC = C.columns();

    if (rowsB != columnsA) {
      throw new IllegalArgumentException(
          "Matrix2D inner dimensions must agree:" + toStringShort() + ", " + B.toStringShort());
    }

    if (rowsC != rowsA || columnsC != columnsB) {
      throw new IllegalArgumentException(
          "Incompatibile result matrix: "
              + toStringShort()
              + ", "
              + B.toStringShort()
              + ", "
              + C.toStringShort());
    }

    // Need native BLAS, dense matrices and no views to operate
    // Default to Colt's implementation otherwise
    if (!NNIInterface.isNativeBlasAvailable()
        || (!(B instanceof NNIDenseDoubleMatrix2D))
        || (!(C instanceof NNIDenseDoubleMatrix2D))
        || isView()
        || ((NNIDenseDoubleMatrix2D) B).isView()
        || ((NNIDenseDoubleMatrix2D) C).isView()) {
      return super.zMult(B, C, alpha, beta, transposeA, transposeB);
    }

    NNIInterface.getBlas()
        .gemm(
            this,
            (NNIDenseDoubleMatrix2D) B,
            (NNIDenseDoubleMatrix2D) C,
            transposeA,
            transposeB,
            columnsA,
            alpha,
            columns,
            beta);

    return C;
  }

예제 #5

0

파일 보기

파일: TermDocumentMatrixBuilder.java 프로젝트: renodim/carrot2

  /**
   * Builds a term document matrix from data provided in the <code>context</code>, stores the result
   * in there.
   */
  public void buildTermDocumentMatrix(VectorSpaceModelContext vsmContext) {
    final PreprocessingContext preprocessingContext = vsmContext.preprocessingContext;

    final int documentCount = preprocessingContext.documents.size();
    final int[] stemsTf = preprocessingContext.allStems.tf;
    final int[][] stemsTfByDocument = preprocessingContext.allStems.tfByDocument;
    final byte[] stemsFieldIndices = preprocessingContext.allStems.fieldIndices;

    if (documentCount == 0) {
      vsmContext.termDocumentMatrix = new DenseDoubleMatrix2D(0, 0);
      vsmContext.stemToRowIndex = new IntIntHashMap();
      return;
    }

    // Determine the index of the title field
    int titleFieldIndex = -1;
    final String[] fieldsName = preprocessingContext.allFields.name;
    for (int i = 0; i < fieldsName.length; i++) {
      if (Document.TITLE.equals(fieldsName[i])) {
        titleFieldIndex = i;
        break;
      }
    }

    // Determine the stems we, ideally, should include in the matrix
    int[] stemsToInclude = computeRequiredStemIndices(preprocessingContext);

    // Sort stems by weight, so that stems get included in the matrix in the order
    // of frequency
    final double[] stemsWeight = new double[stemsToInclude.length];
    for (int i = 0; i < stemsToInclude.length; i++) {
      final int stemIndex = stemsToInclude[i];
      stemsWeight[i] =
          termWeighting.calculateTermWeight(
                  stemsTf[stemIndex], stemsTfByDocument[stemIndex].length / 2, documentCount)
              * getWeightBoost(titleFieldIndex, stemsFieldIndices[stemIndex]);
    }
    final int[] stemWeightOrder =
        IndirectSort.mergesort(
            0, stemsWeight.length, new IndirectComparator.DescendingDoubleComparator(stemsWeight));

    // Calculate the number of terms we can include to fulfill the max matrix size
    final int maxRows = maximumMatrixSize / documentCount;
    final DoubleMatrix2D tdMatrix =
        new DenseDoubleMatrix2D(Math.min(maxRows, stemsToInclude.length), documentCount);

    for (int i = 0; i < stemWeightOrder.length && i < maxRows; i++) {
      final int stemIndex = stemsToInclude[stemWeightOrder[i]];
      final int[] tfByDocument = stemsTfByDocument[stemIndex];
      final int df = tfByDocument.length / 2;
      final byte fieldIndices = stemsFieldIndices[stemIndex];

      for (int j = 0; j < df; j++) {
        double weight =
            termWeighting.calculateTermWeight(tfByDocument[j * 2 + 1], df, documentCount);

        weight *= getWeightBoost(titleFieldIndex, fieldIndices);
        tdMatrix.set(i, tfByDocument[j * 2], weight);
      }
    }

    // Convert stemsToInclude into tdMatrixStemIndices
    final IntIntHashMap stemToRowIndex = new IntIntHashMap();
    for (int i = 0; i < stemWeightOrder.length && i < tdMatrix.rows(); i++) {
      stemToRowIndex.put(stemsToInclude[stemWeightOrder[i]], i);
    }

    // Store the results
    vsmContext.termDocumentMatrix = tdMatrix;
    vsmContext.stemToRowIndex = stemToRowIndex;
  }