@Override
    public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(
        Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
      ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret =
          new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>();
      MatrixIndexes ixIn = arg0._1();
      MatrixBlock mb2 = arg0._2();

      // get the right hand side matrix
      MatrixBlock mb1 = _pmV.getMatrixBlock((int) ixIn.getRowIndex(), 1);

      // compute target block indexes
      long minPos = UtilFunctions.toLong(mb1.minNonZero());
      long maxPos = UtilFunctions.toLong(mb1.max());
      long rowIX1 = (minPos - 1) / _brlen + 1;
      long rowIX2 = (maxPos - 1) / _brlen + 1;
      boolean multipleOuts = (rowIX1 != rowIX2);

      if (minPos >= 1) // at least one row selected
      {
        // output sparsity estimate
        double spmb1 = OptimizerUtils.getSparsity(mb1.getNumRows(), 1, mb1.getNonZeros());
        long estnnz = (long) (spmb1 * mb2.getNonZeros());
        boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, mb2.getNumColumns(), estnnz);

        // compute and allocate output blocks
        MatrixBlock out1 = new MatrixBlock();
        MatrixBlock out2 = multipleOuts ? new MatrixBlock() : null;
        out1.reset(_brlen, mb2.getNumColumns(), sparse);
        if (out2 != null)
          out2.reset(
              UtilFunctions.computeBlockSize(_rlen, rowIX2, _brlen), mb2.getNumColumns(), sparse);

        // compute core matrix permutation (assumes that out1 has default blocksize,
        // hence we do a meta data correction afterwards)
        mb1.permutationMatrixMultOperations(mb2, out1, out2);
        out1.setNumRows(UtilFunctions.computeBlockSize(_rlen, rowIX1, _brlen));
        ret.add(
            new Tuple2<MatrixIndexes, MatrixBlock>(
                new MatrixIndexes(rowIX1, ixIn.getColumnIndex()), out1));
        if (out2 != null)
          ret.add(
              new Tuple2<MatrixIndexes, MatrixBlock>(
                  new MatrixIndexes(rowIX2, ixIn.getColumnIndex()), out2));
      }

      return ret;
    }
  /**
   * @param vector
   * @param singleColBlock
   * @param dense
   * @param unknownDims
   */
  private void testDataFrameConversion(
      ValueType[] schema, boolean containsID, boolean dense, boolean unknownDims) {
    boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
    RUNTIME_PLATFORM oldPlatform = DMLScript.rtplatform;

    SparkExecutionContext sec = null;

    try {
      DMLScript.USE_LOCAL_SPARK_CONFIG = true;
      DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;

      // generate input data and setup metadata
      int cols = schema.length + colsVector - 1;
      double sparsity = dense ? sparsity1 : sparsity2;
      double[][] A = TestUtils.round(getRandomMatrix(rows1, cols, -10, 1000, sparsity, 2373));
      MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
      int blksz = ConfigurationManager.getBlocksize();
      MatrixCharacteristics mc1 =
          new MatrixCharacteristics(rows1, cols, blksz, blksz, mbA.getNonZeros());
      MatrixCharacteristics mc2 =
          unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);

      // setup spark context
      sec = (SparkExecutionContext) ExecutionContextFactory.createContext();
      JavaSparkContext sc = sec.getSparkContext();
      SQLContext sqlctx = new SQLContext(sc);

      // create input data frame
      DataFrame df = createDataFrame(sqlctx, mbA, containsID, schema);

      // dataframe - frame conversion
      JavaPairRDD<Long, FrameBlock> out =
          FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, containsID);

      // get output frame block
      FrameBlock fbB =
          SparkExecutionContext.toFrameBlock(
              out, UtilFunctions.nCopies(cols, ValueType.DOUBLE), rows1, cols);

      // compare frame blocks
      MatrixBlock mbB = DataConverter.convertToMatrixBlock(fbB);
      double[][] B = DataConverter.convertToDoubleMatrix(mbB);
      TestUtils.compareMatrices(A, B, rows1, cols, eps);
    } catch (Exception ex) {
      throw new RuntimeException(ex);
    } finally {
      sec.close();
      DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
      DMLScript.rtplatform = oldPlatform;
    }
  }
 @Override
 public MatrixBlock call(MatrixBlock arg0) throws Exception {
   _aNnz.add((double) arg0.getNonZeros());
   return arg0;
 }
  @SuppressWarnings("deprecation")
  public void flushBuffer(Reporter reporter) throws RuntimeException {
    try {
      if (_mapBuffer != null) {
        MatrixIndexes key = null; // new MatrixIndexes();
        MatrixCell value = new MatrixCell();
        for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) {
          ArrayList<Integer> resultIDs =
              ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
          CTableMap resultMap = ctable.getValue();

          // maintain result dims and nonzeros
          for (Integer i : resultIDs) {
            _resultNonZeros[i] += resultMap.size();
            if (_resultDimsUnknown[i] == (byte) 1) {
              _resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]);
              _resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]);
            }
          }

          // output result data
          for (LLDoubleEntry e : resultMap.entrySet()) {
            key = new MatrixIndexes(e.key1, e.key2);
            value.setValue(e.value);
            for (Integer i : resultIDs) {
              _collector.collectOutput(key, value, i, reporter);
            }
          }
        }
      } else if (_blockBuffer != null) {
        MatrixIndexes key = new MatrixIndexes(1, 1);
        // DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(),
        // mc.get_rows_per_block(), mc.get_cols_per_block(), replication);
        for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) {
          ArrayList<Integer> resultIDs =
              ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
          MatrixBlock outBlock = ctable.getValue();
          outBlock.recomputeNonZeros();

          // TODO: change hard coding of 1000
          int brlen = 1000, bclen = 1000;
          int rlen = outBlock.getNumRows();
          int clen = outBlock.getNumColumns();

          // final output matrix is smaller than a single block
          if (rlen <= brlen && clen <= brlen) {
            key = new MatrixIndexes(1, 1);
            for (Integer i : resultIDs) {
              _collector.collectOutput(key, outBlock, i, reporter);
              _resultNonZeros[i] += outBlock.getNonZeros();
            }
          } else {
            // Following code is similar to that in
            // DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS
            // initialize blocks for reuse (at most 4 different blocks required)
            MatrixBlock[] blocks =
                MatrixWriter.createMatrixBlocksForReuse(
                    rlen, clen, brlen, bclen, true, outBlock.getNonZeros());

            // create and write subblocks of matrix
            for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) {
              for (int blockCol = 0;
                  blockCol < (int) Math.ceil(clen / (double) bclen);
                  blockCol++) {
                int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen;
                int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen;

                int row_offset = blockRow * brlen;
                int col_offset = blockCol * bclen;

                // get reuse matrix block
                MatrixBlock block =
                    MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);

                // copy submatrix to block
                outBlock.sliceOperations(
                    row_offset,
                    row_offset + maxRow - 1,
                    col_offset,
                    col_offset + maxCol - 1,
                    block);

                // TODO: skip empty "block"

                // append block to sequence file
                key.setIndexes(blockRow + 1, blockCol + 1);
                for (Integer i : resultIDs) {
                  _collector.collectOutput(key, block, i, reporter);
                  _resultNonZeros[i] += block.getNonZeros();
                }

                // reset block for later reuse
                block.reset();
              }
            }
          }
        }
      } else {
        throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty.");
      }
    } catch (Exception ex) {
      throw new RuntimeException("Failed to flush ctable buffer.", ex);
    }
    // remove existing partial ctables
    if (_mapBuffer != null) _mapBuffer.clear();
    else _blockBuffer.clear();
  }