// output the records in the outCache.
  public void close() throws IOException {
    long start = System.currentTimeMillis();
    Iterator<Entry<MatrixIndexes, MatrixValue>> it = outCache.entrySet().iterator();
    while (it.hasNext()) {
      Entry<MatrixIndexes, MatrixValue> entry = it.next();
      realWriteToCollector(entry.getKey(), entry.getValue());
    }

    // handle empty block output (on first reduce task only)
    if (outputDummyRecords) // required for rejecting empty blocks in mappers
    {
      long rlen = dim1.getRows();
      long clen = dim2.getCols();
      int brlen = dim1.getRowsPerBlock();
      int bclen = dim2.getColsPerBlock();
      MatrixIndexes tmpIx = new MatrixIndexes();
      MatrixBlock tmpVal = new MatrixBlock();
      for (long i = 0, r = 1; i < rlen; i += brlen, r++)
        for (long j = 0, c = 1; j < clen; j += bclen, c++) {
          int realBrlen = (int) Math.min((long) brlen, rlen - (r - 1) * brlen);
          int realBclen = (int) Math.min((long) bclen, clen - (c - 1) * bclen);
          tmpIx.setIndexes(r, c);
          tmpVal.reset(realBrlen, realBclen);
          collectFinalMultipleOutputs.collectOutput(tmpIx, tmpVal, 0, cachedReporter);
        }
    }

    if (cachedReporter != null)
      cachedReporter.incrCounter(
          Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
    super.close();
  }
  /**
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   * @throws IllegalAccessException
   * @throws InstantiationException
   */
  @SuppressWarnings("deprecation")
  private void readBinaryBlockMatrixBlocksFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      Collection<IndexedMatrixValue> dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException {
    MatrixIndexes key = new MatrixIndexes();
    MatrixBlock value = new MatrixBlock();

    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
      MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
    {
      // directly read from sequence files (individual partfiles)
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

      try {
        while (reader.next(key, value)) {
          int row_offset = (int) (key.getRowIndex() - 1) * brlen;
          int col_offset = (int) (key.getColumnIndex() - 1) * bclen;
          int rows = value.getNumRows();
          int cols = value.getNumColumns();

          // bound check per block
          if (row_offset + rows < 0
              || row_offset + rows > rlen
              || col_offset + cols < 0
              || col_offset + cols > clen) {
            throw new IOException(
                "Matrix block ["
                    + (row_offset + 1)
                    + ":"
                    + (row_offset + rows)
                    + ","
                    + (col_offset + 1)
                    + ":"
                    + (col_offset + cols)
                    + "] "
                    + "out of overall matrix range [1:"
                    + rlen
                    + ",1:"
                    + clen
                    + "].");
          }

          // copy block to result
          dest.add(new IndexedMatrixValue(new MatrixIndexes(key), new MatrixBlock(value)));
        }
      } finally {
        IOUtilFunctions.closeSilently(reader);
      }
    }
  }
    @Override
    protected Tuple2<MatrixIndexes, MatrixBlock> computeNext(Tuple2<MatrixIndexes, MatrixBlock> arg)
        throws Exception {
      // unpack partition key-value pairs
      MatrixIndexes ix = arg._1();
      MatrixBlock in1 = arg._2();

      // get the rhs block
      int rix = (int) ((_vtype == VectorType.COL_VECTOR) ? ix.getRowIndex() : 1);
      int cix = (int) ((_vtype == VectorType.COL_VECTOR) ? 1 : ix.getColumnIndex());
      MatrixBlock in2 = _pmV.getMatrixBlock(rix, cix);

      // execute the binary operation
      MatrixBlock ret = (MatrixBlock) (in1.binaryOperations(_op, in2, new MatrixBlock()));
      return new Tuple2<MatrixIndexes, MatrixBlock>(ix, ret);
    }
  @Override
  public MatrixBlock readMatrixFromHDFS(
      String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
      throws IOException, DMLRuntimeException {
    // allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, estnnz, false, false);

    // prepare file access
    JobConf job = new JobConf();
    FileSystem fs = _localFS ? FileSystem.getLocal(job) : FileSystem.get(job);
    Path path = new Path((_localFS ? "file:///" : "") + fname);

    // check existence and non-empty file
    checkValidInputFile(fs, path);

    // core read
    readBinaryBlockMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    // finally check if change of sparse/dense block representation required
    ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
  }
  /**
   * Note: For efficiency, we directly use SequenceFile.Reader instead of SequenceFileInputFormat-
   * InputSplits-RecordReader (SequenceFileRecordReader). First, this has no drawbacks since the
   * SequenceFileRecordReader internally uses SequenceFile.Reader as well. Second, it is
   * advantageous if the actual sequence files are larger than the file splits created by
   * informat.getSplits (which is usually aligned to the HDFS block size) because then there is
   * overhead for finding the actual split between our 1k-1k blocks. This case happens if the read
   * matrix was create by CP or when jobs directly write to large output files (e.g., parfor matrix
   * partitioning).
   *
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   * @throws IllegalAccessException
   * @throws InstantiationException
   * @throws DMLRuntimeException
   */
  @SuppressWarnings("deprecation")
  private static void readBinaryBlockMatrixFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      MatrixBlock dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException, DMLRuntimeException {
    boolean sparse = dest.isInSparseFormat();
    MatrixIndexes key = new MatrixIndexes();
    MatrixBlock value = new MatrixBlock();

    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
      MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
    {
      // directly read from sequence files (individual partfiles)
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

      try {
        // note: next(key, value) does not yet exploit the given serialization classes, record
        // reader does but is generally slower.
        while (reader.next(key, value)) {
          // empty block filter (skip entire block)
          if (value.isEmptyBlock(false)) continue;

          int row_offset = (int) (key.getRowIndex() - 1) * brlen;
          int col_offset = (int) (key.getColumnIndex() - 1) * bclen;

          int rows = value.getNumRows();
          int cols = value.getNumColumns();

          // bound check per block
          if (row_offset + rows < 0
              || row_offset + rows > rlen
              || col_offset + cols < 0
              || col_offset + cols > clen) {
            throw new IOException(
                "Matrix block ["
                    + (row_offset + 1)
                    + ":"
                    + (row_offset + rows)
                    + ","
                    + (col_offset + 1)
                    + ":"
                    + (col_offset + cols)
                    + "] "
                    + "out of overall matrix range [1:"
                    + rlen
                    + ",1:"
                    + clen
                    + "].");
          }

          // copy block to result
          if (sparse) {
            dest.appendToSparse(value, row_offset, col_offset);
            // note: append requires final sort
          } else {
            dest.copy(
                row_offset, row_offset + rows - 1, col_offset, col_offset + cols - 1, value, false);
          }
        }
      } finally {
        IOUtilFunctions.closeSilently(reader);
      }
    }

    if (sparse && clen > bclen) {
      // no need to sort if 1 column block since always sorted
      dest.sortSparseRows();
    }
  }
  /**
   * @param path
   * @param job
   * @param src
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   */
  protected void writeTextCellMatrixToHDFS(
      Path path, JobConf job, MatrixBlock src, long rlen, long clen) throws IOException {
    boolean sparse = src.isInSparseFormat();
    boolean entriesWritten = false;
    FileSystem fs = FileSystem.get(job);
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

    int rows = src.getNumRows();
    int cols = src.getNumColumns();

    // bound check per block
    if (rows > rlen || cols > clen) {
      throw new IOException(
          "Matrix block [1:"
              + rows
              + ",1:"
              + cols
              + "] "
              + "out of overall matrix range [1:"
              + rlen
              + ",1:"
              + clen
              + "].");
    }

    try {
      // for obj reuse and preventing repeated buffer re-allocations
      StringBuilder sb = new StringBuilder();

      if (sparse) // SPARSE
      {
        SparseRowsIterator iter = src.getSparseRowsIterator();
        while (iter.hasNext()) {
          IJV cell = iter.next();

          sb.append(cell.i + 1);
          sb.append(' ');
          sb.append(cell.j + 1);
          sb.append(' ');
          sb.append(cell.v);
          sb.append('\n');
          br.write(sb.toString()); // same as append
          sb.setLength(0);
          entriesWritten = true;
        }
      } else // DENSE
      {
        for (int i = 0; i < rows; i++) {
          String rowIndex = Integer.toString(i + 1);
          for (int j = 0; j < cols; j++) {
            double lvalue = src.getValueDenseUnsafe(i, j);
            if (lvalue != 0) // for nnz
            {
              sb.append(rowIndex);
              sb.append(' ');
              sb.append(j + 1);
              sb.append(' ');
              sb.append(lvalue);
              sb.append('\n');
              br.write(sb.toString()); // same as append
              sb.setLength(0);
              entriesWritten = true;
            }
          }
        }
      }

      // handle empty result
      if (!entriesWritten) {
        br.write("1 1 0\n");
      }
    } finally {
      IOUtilFunctions.closeSilently(br);
    }
  }