/**
   * This implements matrix this.transpose().times(other)
   *
   * @param other a DistributedRowMatrix
   * @param outPath path to write result to
   * @return a DistributedRowMatrix containing the product
   */
  public DistributedRowMatrix times(DistributedRowMatrix other, Path outPath) throws IOException {
    if (numRows != other.numRows()) {
      throw new CardinalityException(numRows, other.numRows());
    }

    Configuration initialConf = getConf() == null ? new Configuration() : getConf();
    Configuration conf =
        MatrixMultiplicationJob.createMatrixMultiplyJobConf(
            initialConf, rowPath, other.rowPath, outPath, other.numCols);
    JobClient.runJob(new JobConf(conf));
    DistributedRowMatrix out =
        new DistributedRowMatrix(outPath, outputTmpPath, numCols, other.numCols());
    out.setConf(conf);
    return out;
  }
  /**
   * This implements matrix this.transpose().times(other)
   *
   * @param other a DistributedRowMatrix
   * @return a DistributedRowMatrix containing the product
   */
  public DistributedRowMatrix times(DistributedRowMatrix other) throws IOException {
    if (numRows != other.numRows()) {
      throw new CardinalityException(numRows, other.numRows());
    }
    Path outPath =
        new Path(outputTmpBasePath.getParent(), "productWith-" + (System.nanoTime() & 0xFF));

    Configuration initialConf = getConf() == null ? new Configuration() : getConf();
    Configuration conf =
        MatrixMultiplicationJob.createMatrixMultiplyJobConf(
            initialConf, rowPath, other.rowPath, outPath, other.numCols);
    JobClient.runJob(new JobConf(conf));
    DistributedRowMatrix out =
        new DistributedRowMatrix(outPath, outputTmpPath, numCols, other.numCols());
    out.setConf(conf);
    return out;
  }
Пример #3
0
  /**
   * Refer to {@link ReconstructionErrJob} for explanation of the job. In short:
   *
   * <p>X = Y * Y2X
   *
   * <p>Err = (X - Xm) * C' - (Y - Ym)
   *
   * @param matrixY the input matrix Y
   * @param matrixY2X the in-memory matrix to generate X
   * @param matrixC the in-memory matrix to reconstruct Y
   * @param C_central the central version of matrixC
   * @param Ym the mean vector of Y
   * @param Xm = Ym * matrixY2X
   * @param conf the configuration
   * @param tmpPath the temporary path
   * @param id the unique id to name the files in HDFS
   * @return the norm-2 of the the Err matrix
   * @throws IOException
   * @throws InterruptedException
   * @throws ClassNotFoundException
   */
  public double reconstructionErr(
      DistributedRowMatrix matrixY,
      DistributedRowMatrix matrixY2X,
      DistributedRowMatrix matrixC,
      Matrix C_central,
      Vector Ym,
      DenseVector Xm,
      final float ERR_SAMPLE_RATE,
      Configuration conf,
      Path tmpPath,
      String id)
      throws IOException, InterruptedException, ClassNotFoundException {
    DenseVector Zm = new DenseVector(C_central.numRows());
    PCACommon.vectorTimesMatrixTranspose(Xm, (DenseMatrix) C_central, Zm);
    Zm = (DenseVector) Zm.minus(Ym);

    Path resPath = new Path(tmpPath, "reconstructionErr" + id);
    FileSystem fs = FileSystem.get(resPath.toUri(), conf);
    if (!fs.exists(resPath)) {
      Path ZmPath = PCACommon.toDistributedVector(Zm, tmpPath, "Zm" + id, conf);
      Path YmPath = PCACommon.toDistributedVector(Ym, tmpPath, "Ymforerr" + id, conf);
      run(
          conf,
          matrixY.getRowPath(),
          matrixY2X.getRowPath(),
          matrixY2X.numRows(),
          matrixY2X.numCols(),
          matrixC.getRowPath(),
          ZmPath.toString(),
          YmPath.toString(),
          resPath,
          ERR_SAMPLE_RATE);
    } else {
      log.warn("---------- Skip ReconstructionErrJob - already exists: " + resPath);
    }
    loadResults(resPath, conf);

    log.info("0 is reconstruction err, 1 is Y norm (err/norm), " + "2 is Y-Ym norm (err/norm)");
    log.info("The error of 0 is " + reconstructionError);
    log.info("The error of 1 is " + yNorm + " (" + reconstructionError / yNorm + ")");
    log.info(
        "The error of 2 is "
            + centralizedYNorm
            + " ("
            + reconstructionError / centralizedYNorm
            + ")");
    double error = reconstructionError / centralizedYNorm;
    return error;
  }