/** * This implements matrix this.transpose().times(other) * * @param other a DistributedRowMatrix * @param outPath path to write result to * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix times(DistributedRowMatrix other, Path outPath) throws IOException { if (numRows != other.numRows()) { throw new CardinalityException(numRows, other.numRows()); } Configuration initialConf = getConf() == null ? new Configuration() : getConf(); Configuration conf = MatrixMultiplicationJob.createMatrixMultiplyJobConf( initialConf, rowPath, other.rowPath, outPath, other.numCols); JobClient.runJob(new JobConf(conf)); DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, numCols, other.numCols()); out.setConf(conf); return out; }
/** * This implements matrix this.transpose().times(other) * * @param other a DistributedRowMatrix * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix times(DistributedRowMatrix other) throws IOException { if (numRows != other.numRows()) { throw new CardinalityException(numRows, other.numRows()); } Path outPath = new Path(outputTmpBasePath.getParent(), "productWith-" + (System.nanoTime() & 0xFF)); Configuration initialConf = getConf() == null ? new Configuration() : getConf(); Configuration conf = MatrixMultiplicationJob.createMatrixMultiplyJobConf( initialConf, rowPath, other.rowPath, outPath, other.numCols); JobClient.runJob(new JobConf(conf)); DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, numCols, other.numCols()); out.setConf(conf); return out; }
/** * Refer to {@link ReconstructionErrJob} for explanation of the job. In short: * * <p>X = Y * Y2X * * <p>Err = (X - Xm) * C' - (Y - Ym) * * @param matrixY the input matrix Y * @param matrixY2X the in-memory matrix to generate X * @param matrixC the in-memory matrix to reconstruct Y * @param C_central the central version of matrixC * @param Ym the mean vector of Y * @param Xm = Ym * matrixY2X * @param conf the configuration * @param tmpPath the temporary path * @param id the unique id to name the files in HDFS * @return the norm-2 of the the Err matrix * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public double reconstructionErr( DistributedRowMatrix matrixY, DistributedRowMatrix matrixY2X, DistributedRowMatrix matrixC, Matrix C_central, Vector Ym, DenseVector Xm, final float ERR_SAMPLE_RATE, Configuration conf, Path tmpPath, String id) throws IOException, InterruptedException, ClassNotFoundException { DenseVector Zm = new DenseVector(C_central.numRows()); PCACommon.vectorTimesMatrixTranspose(Xm, (DenseMatrix) C_central, Zm); Zm = (DenseVector) Zm.minus(Ym); Path resPath = new Path(tmpPath, "reconstructionErr" + id); FileSystem fs = FileSystem.get(resPath.toUri(), conf); if (!fs.exists(resPath)) { Path ZmPath = PCACommon.toDistributedVector(Zm, tmpPath, "Zm" + id, conf); Path YmPath = PCACommon.toDistributedVector(Ym, tmpPath, "Ymforerr" + id, conf); run( conf, matrixY.getRowPath(), matrixY2X.getRowPath(), matrixY2X.numRows(), matrixY2X.numCols(), matrixC.getRowPath(), ZmPath.toString(), YmPath.toString(), resPath, ERR_SAMPLE_RATE); } else { log.warn("---------- Skip ReconstructionErrJob - already exists: " + resPath); } loadResults(resPath, conf); log.info("0 is reconstruction err, 1 is Y norm (err/norm), " + "2 is Y-Ym norm (err/norm)"); log.info("The error of 0 is " + reconstructionError); log.info("The error of 1 is " + yNorm + " (" + reconstructionError / yNorm + ")"); log.info( "The error of 2 is " + centralizedYNorm + " (" + reconstructionError / centralizedYNorm + ")"); double error = reconstructionError / centralizedYNorm; return error; }