/** * Refer to {@link ReconstructionErrJob} for explanation of the job. In short: * * <p>X = Y * Y2X * * <p>Err = (X - Xm) * C' - (Y - Ym) * * @param matrixY the input matrix Y * @param matrixY2X the in-memory matrix to generate X * @param matrixC the in-memory matrix to reconstruct Y * @param C_central the central version of matrixC * @param Ym the mean vector of Y * @param Xm = Ym * matrixY2X * @param conf the configuration * @param tmpPath the temporary path * @param id the unique id to name the files in HDFS * @return the norm-2 of the the Err matrix * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public double reconstructionErr( DistributedRowMatrix matrixY, DistributedRowMatrix matrixY2X, DistributedRowMatrix matrixC, Matrix C_central, Vector Ym, DenseVector Xm, final float ERR_SAMPLE_RATE, Configuration conf, Path tmpPath, String id) throws IOException, InterruptedException, ClassNotFoundException { DenseVector Zm = new DenseVector(C_central.numRows()); PCACommon.vectorTimesMatrixTranspose(Xm, (DenseMatrix) C_central, Zm); Zm = (DenseVector) Zm.minus(Ym); Path resPath = new Path(tmpPath, "reconstructionErr" + id); FileSystem fs = FileSystem.get(resPath.toUri(), conf); if (!fs.exists(resPath)) { Path ZmPath = PCACommon.toDistributedVector(Zm, tmpPath, "Zm" + id, conf); Path YmPath = PCACommon.toDistributedVector(Ym, tmpPath, "Ymforerr" + id, conf); run( conf, matrixY.getRowPath(), matrixY2X.getRowPath(), matrixY2X.numRows(), matrixY2X.numCols(), matrixC.getRowPath(), ZmPath.toString(), YmPath.toString(), resPath, ERR_SAMPLE_RATE); } else { log.warn("---------- Skip ReconstructionErrJob - already exists: " + resPath); } loadResults(resPath, conf); log.info("0 is reconstruction err, 1 is Y norm (err/norm), " + "2 is Y-Ym norm (err/norm)"); log.info("The error of 0 is " + reconstructionError); log.info("The error of 1 is " + yNorm + " (" + reconstructionError / yNorm + ")"); log.info( "The error of 2 is " + centralizedYNorm + " (" + reconstructionError / centralizedYNorm + ")"); double error = reconstructionError / centralizedYNorm; return error; }
@Override public void map(IntWritable iw, VectorWritable vw, Context context) throws IOException { if (PCACommon.pass(ERR_SAMPLE_RATE)) return; Vector yi = vw.get(); if (xi == null) xi = new DenseVector(matrixY2X.numCols()); PCACommon.sparseVectorTimesMatrix(yi, matrixY2X, xi); PCACommon.vectorTimesMatrixTranspose(xi, matrixC, xiCt); denseVectorSubtractSparseSubtractDense(xiCt, yi, zm); sumOfErr.assign( xiCt, new DoubleDoubleFunction() { @Override public double apply(double arg1, double arg2) { return arg1 + Math.abs(arg2); } }); denseVectorPlusAbsSparseVector(sumOfyi, yi); denseVectorPlusAbsDenseDiff(sumOfyc, yi, ym); }