/** * A native implementation of Colt's original multiplication method method. * * <p>NOTE: this method will use native calls only when: * * <ul> * <li>all input matrices are @link DenseDoubleMatrix2D or its subclasses (e.g. @link * NNIDenseDoubleMatrix2D) * <li>none of the input matrices is a view * <li>the dynamic libraries required by the NNI are available * </ul> */ public DoubleMatrix2D zMult( DoubleMatrix2D B, DoubleMatrix2D C, double alpha, double beta, boolean transposeA, boolean transposeB) { // A workaround for a bug in DenseDoubleMatrix2D. // If B is a SelectedDenseDoubleMatrix the implementation of this method // throws a ClassCastException. The workaround is to swap and transpose // the arguments and then transpose the result. As SelectedDenseDoubleMatrix2D is // package-private, if it was loaded with a different class loader than // the one used for this class it would give a VerificationError if we referred // to it directly here. Hence the hacky string comparison here. // if (B.getClass().getName().endsWith("SelectedDenseDoubleMatrix2D")) { return B.zMult(this, C, alpha, beta, !transposeB, !transposeA).viewDice(); } // Check the sizes int rowsB = (transposeB ? B.columns() : B.rows()); int columnsB = (transposeB ? B.rows() : B.columns()); int rowsA = (transposeA ? columns() : rows()); int columnsA = (transposeA ? rows() : columns()); if (C == null) { C = new NNIDenseDoubleMatrix2D(rowsA, columnsB); } if (this == C || B == C) { throw new IllegalArgumentException("Matrices must not be identical"); } final int rowsC = C.rows(); final int columnsC = C.columns(); if (rowsB != columnsA) { throw new IllegalArgumentException( "Matrix2D inner dimensions must agree:" + toStringShort() + ", " + B.toStringShort()); } if (rowsC != rowsA || columnsC != columnsB) { throw new IllegalArgumentException( "Incompatibile result matrix: " + toStringShort() + ", " + B.toStringShort() + ", " + C.toStringShort()); } // Need native BLAS, dense matrices and no views to operate // Default to Colt's implementation otherwise if (!NNIInterface.isNativeBlasAvailable() || (!(B instanceof NNIDenseDoubleMatrix2D)) || (!(C instanceof NNIDenseDoubleMatrix2D)) || isView() || ((NNIDenseDoubleMatrix2D) B).isView() || ((NNIDenseDoubleMatrix2D) C).isView()) { return super.zMult(B, C, alpha, beta, transposeA, transposeB); } NNIInterface.getBlas() .gemm( this, (NNIDenseDoubleMatrix2D) B, (NNIDenseDoubleMatrix2D) C, transposeA, transposeB, columnsA, alpha, columns, beta); return C; }
/** * Builds a term document matrix from data provided in the <code>context</code>, stores the result * in there. */ public void buildTermDocumentMatrix(VectorSpaceModelContext vsmContext) { final PreprocessingContext preprocessingContext = vsmContext.preprocessingContext; final int documentCount = preprocessingContext.documents.size(); final int[] stemsTf = preprocessingContext.allStems.tf; final int[][] stemsTfByDocument = preprocessingContext.allStems.tfByDocument; final byte[] stemsFieldIndices = preprocessingContext.allStems.fieldIndices; if (documentCount == 0) { vsmContext.termDocumentMatrix = new DenseDoubleMatrix2D(0, 0); vsmContext.stemToRowIndex = new IntIntHashMap(); return; } // Determine the index of the title field int titleFieldIndex = -1; final String[] fieldsName = preprocessingContext.allFields.name; for (int i = 0; i < fieldsName.length; i++) { if (Document.TITLE.equals(fieldsName[i])) { titleFieldIndex = i; break; } } // Determine the stems we, ideally, should include in the matrix int[] stemsToInclude = computeRequiredStemIndices(preprocessingContext); // Sort stems by weight, so that stems get included in the matrix in the order // of frequency final double[] stemsWeight = new double[stemsToInclude.length]; for (int i = 0; i < stemsToInclude.length; i++) { final int stemIndex = stemsToInclude[i]; stemsWeight[i] = termWeighting.calculateTermWeight( stemsTf[stemIndex], stemsTfByDocument[stemIndex].length / 2, documentCount) * getWeightBoost(titleFieldIndex, stemsFieldIndices[stemIndex]); } final int[] stemWeightOrder = IndirectSort.mergesort( 0, stemsWeight.length, new IndirectComparator.DescendingDoubleComparator(stemsWeight)); // Calculate the number of terms we can include to fulfill the max matrix size final int maxRows = maximumMatrixSize / documentCount; final DoubleMatrix2D tdMatrix = new DenseDoubleMatrix2D(Math.min(maxRows, stemsToInclude.length), documentCount); for (int i = 0; i < stemWeightOrder.length && i < maxRows; i++) { final int stemIndex = stemsToInclude[stemWeightOrder[i]]; final int[] tfByDocument = stemsTfByDocument[stemIndex]; final int df = tfByDocument.length / 2; final byte fieldIndices = stemsFieldIndices[stemIndex]; for (int j = 0; j < df; j++) { double weight = termWeighting.calculateTermWeight(tfByDocument[j * 2 + 1], df, documentCount); weight *= getWeightBoost(titleFieldIndex, fieldIndices); tdMatrix.set(i, tfByDocument[j * 2], weight); } } // Convert stemsToInclude into tdMatrixStemIndices final IntIntHashMap stemToRowIndex = new IntIntHashMap(); for (int i = 0; i < stemWeightOrder.length && i < tdMatrix.rows(); i++) { stemToRowIndex.put(stemsToInclude[stemWeightOrder[i]], i); } // Store the results vsmContext.termDocumentMatrix = tdMatrix; vsmContext.stemToRowIndex = stemToRowIndex; }