Example #1
0
 @Override
 public void setup(Context context) throws IOException {
   Configuration conf = context.getConfiguration();
   Path cMemMatrixPath = new Path(conf.get(RECONSTRUCTIONMATRIX));
   Path dMemMatrixPath = new Path(conf.get(MATRIXY2X));
   Path zmPath = new Path(conf.get(ZMPATH));
   Path meanPath = new Path(conf.get(YMPATH));
   int inMemMatrixNumRows = conf.getInt(YCOLS, 0);
   int inMemMatrixNumCols = conf.getInt(XCOLS, 0);
   ERR_SAMPLE_RATE = conf.getFloat(ERRSAMPLERATE, 1);
   Path tmpPath = cMemMatrixPath.getParent();
   DistributedRowMatrix distMatrix =
       new DistributedRowMatrix(cMemMatrixPath, tmpPath, inMemMatrixNumRows, inMemMatrixNumCols);
   distMatrix.setConf(conf);
   matrixC = PCACommon.toDenseMatrix(distMatrix);
   distMatrix =
       new DistributedRowMatrix(dMemMatrixPath, tmpPath, inMemMatrixNumRows, inMemMatrixNumCols);
   distMatrix.setConf(conf);
   matrixY2X = PCACommon.toDenseMatrix(distMatrix);
   try {
     zm = PCACommon.toDenseVector(zmPath, conf);
     ym = PCACommon.toDenseVector(meanPath, conf);
   } catch (IOException e) {
     e.printStackTrace();
   }
   xiCt = new DenseVector(matrixC.numRows());
   sumOfErr = new DenseVector(matrixC.numRows());
   sumOfyi = new DenseVector(matrixC.numRows());
   sumOfyc = new DenseVector(matrixC.numRows());
 }
Example #2
0
  /**
   * Refer to {@link ReconstructionErrJob} for explanation of the job. In short:
   *
   * <p>X = Y * Y2X
   *
   * <p>Err = (X - Xm) * C' - (Y - Ym)
   *
   * @param matrixY the input matrix Y
   * @param matrixY2X the in-memory matrix to generate X
   * @param matrixC the in-memory matrix to reconstruct Y
   * @param C_central the central version of matrixC
   * @param Ym the mean vector of Y
   * @param Xm = Ym * matrixY2X
   * @param conf the configuration
   * @param tmpPath the temporary path
   * @param id the unique id to name the files in HDFS
   * @return the norm-2 of the the Err matrix
   * @throws IOException
   * @throws InterruptedException
   * @throws ClassNotFoundException
   */
  public double reconstructionErr(
      DistributedRowMatrix matrixY,
      DistributedRowMatrix matrixY2X,
      DistributedRowMatrix matrixC,
      Matrix C_central,
      Vector Ym,
      DenseVector Xm,
      final float ERR_SAMPLE_RATE,
      Configuration conf,
      Path tmpPath,
      String id)
      throws IOException, InterruptedException, ClassNotFoundException {
    DenseVector Zm = new DenseVector(C_central.numRows());
    PCACommon.vectorTimesMatrixTranspose(Xm, (DenseMatrix) C_central, Zm);
    Zm = (DenseVector) Zm.minus(Ym);

    Path resPath = new Path(tmpPath, "reconstructionErr" + id);
    FileSystem fs = FileSystem.get(resPath.toUri(), conf);
    if (!fs.exists(resPath)) {
      Path ZmPath = PCACommon.toDistributedVector(Zm, tmpPath, "Zm" + id, conf);
      Path YmPath = PCACommon.toDistributedVector(Ym, tmpPath, "Ymforerr" + id, conf);
      run(
          conf,
          matrixY.getRowPath(),
          matrixY2X.getRowPath(),
          matrixY2X.numRows(),
          matrixY2X.numCols(),
          matrixC.getRowPath(),
          ZmPath.toString(),
          YmPath.toString(),
          resPath,
          ERR_SAMPLE_RATE);
    } else {
      log.warn("---------- Skip ReconstructionErrJob - already exists: " + resPath);
    }
    loadResults(resPath, conf);

    log.info("0 is reconstruction err, 1 is Y norm (err/norm), " + "2 is Y-Ym norm (err/norm)");
    log.info("The error of 0 is " + reconstructionError);
    log.info("The error of 1 is " + yNorm + " (" + reconstructionError / yNorm + ")");
    log.info(
        "The error of 2 is "
            + centralizedYNorm
            + " ("
            + reconstructionError / centralizedYNorm
            + ")");
    double error = reconstructionError / centralizedYNorm;
    return error;
  }
Example #3
0
    @Override
    public void map(IntWritable iw, VectorWritable vw, Context context) throws IOException {
      if (PCACommon.pass(ERR_SAMPLE_RATE)) return;

      Vector yi = vw.get();
      if (xi == null) xi = new DenseVector(matrixY2X.numCols());
      PCACommon.sparseVectorTimesMatrix(yi, matrixY2X, xi);

      PCACommon.vectorTimesMatrixTranspose(xi, matrixC, xiCt);
      denseVectorSubtractSparseSubtractDense(xiCt, yi, zm);
      sumOfErr.assign(
          xiCt,
          new DoubleDoubleFunction() {
            @Override
            public double apply(double arg1, double arg2) {
              return arg1 + Math.abs(arg2);
            }
          });
      denseVectorPlusAbsSparseVector(sumOfyi, yi);
      denseVectorPlusAbsDenseDiff(sumOfyc, yi, ym);
    }