@Override
    public Tuple2<MatrixIndexes, MatrixBlock> call(Tuple2<MatrixIndexes, MatrixBlock> arg0)
        throws Exception {
      MatrixIndexes ixIn = arg0._1();
      MatrixBlock blkIn = arg0._2();

      MatrixIndexes ixOut = new MatrixIndexes();
      MatrixBlock blkOut = new MatrixBlock();

      if (_type == CacheType.LEFT) {
        // get the right hand side matrix
        MatrixBlock left = _pbc.getMatrixBlock(1, (int) ixIn.getRowIndex());

        // execute matrix-vector mult
        OperationsOnMatrixValues.performAggregateBinary(
            new MatrixIndexes(1, ixIn.getRowIndex()), left, ixIn, blkIn, ixOut, blkOut, _op);
      } else // if( _type == CacheType.RIGHT )
      {
        // get the right hand side matrix
        MatrixBlock right = _pbc.getMatrixBlock((int) ixIn.getColumnIndex(), 1);

        // execute matrix-vector mult
        OperationsOnMatrixValues.performAggregateBinary(
            ixIn, blkIn, new MatrixIndexes(ixIn.getColumnIndex(), 1), right, ixOut, blkOut, _op);
      }

      // output new tuple
      return new Tuple2<MatrixIndexes, MatrixBlock>(ixOut, blkOut);
    }
    @Override
    public MatrixBlock call(MatrixBlock arg0) throws Exception {
      MatrixBlock pmV = _pmV.getMatrixBlock(1, 1);

      // execute mapmmchain operation
      MatrixBlock out = new MatrixBlock();
      return arg0.chainMatrixMultOperations(pmV, null, out, ChainType.XtXv);
    }
    @Override
    public Tuple2<MatrixIndexes, MatrixBlock> call(Tuple2<MatrixIndexes, MatrixBlock> arg0)
        throws Exception {
      MatrixBlock pmV = _pmV.getMatrixBlock(1, 1);

      MatrixIndexes ixIn = arg0._1();
      MatrixBlock blkIn = arg0._2();
      int rowIx = (int) ixIn.getRowIndex();

      MatrixIndexes ixOut = new MatrixIndexes(1, 1);
      MatrixBlock blkOut = new MatrixBlock();

      // execute mapmmchain operation
      blkIn.chainMatrixMultOperations(pmV, _pmW.getMatrixBlock(rowIx, 1), blkOut, ChainType.XtwXv);

      // output new tuple
      return new Tuple2<MatrixIndexes, MatrixBlock>(ixOut, blkOut);
    }
    @Override
    public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(
        Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
      ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret =
          new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>();

      MatrixIndexes ixIn = arg0._1();
      MatrixBlock blkIn = arg0._2();

      if (_type == CacheType.LEFT) {
        // for all matching left-hand-side blocks
        int len = _pbc.getNumRowBlocks();
        for (int i = 1; i <= len; i++) {
          MatrixBlock left = _pbc.getMatrixBlock(i, (int) ixIn.getRowIndex());
          MatrixIndexes ixOut = new MatrixIndexes();
          MatrixBlock blkOut = new MatrixBlock();

          // execute matrix-vector mult
          OperationsOnMatrixValues.performAggregateBinary(
              new MatrixIndexes(i, ixIn.getRowIndex()), left, ixIn, blkIn, ixOut, blkOut, _op);

          ret.add(new Tuple2<MatrixIndexes, MatrixBlock>(ixOut, blkOut));
        }
      } else // if( _type == CacheType.RIGHT )
      {
        // for all matching right-hand-side blocks
        int len = _pbc.getNumColumnBlocks();
        for (int j = 1; j <= len; j++) {
          // get the right hand side matrix
          MatrixBlock right = _pbc.getMatrixBlock((int) ixIn.getColumnIndex(), j);
          MatrixIndexes ixOut = new MatrixIndexes();
          MatrixBlock blkOut = new MatrixBlock();

          // execute matrix-vector mult
          OperationsOnMatrixValues.performAggregateBinary(
              ixIn, blkIn, new MatrixIndexes(ixIn.getColumnIndex(), j), right, ixOut, blkOut, _op);

          ret.add(new Tuple2<MatrixIndexes, MatrixBlock>(ixOut, blkOut));
        }
      }

      return ret;
    }
    @Override
    public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(
        Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
      ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret =
          new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>();
      MatrixIndexes ixIn = arg0._1();
      MatrixBlock mb2 = arg0._2();

      // get the right hand side matrix
      MatrixBlock mb1 = _pmV.getMatrixBlock((int) ixIn.getRowIndex(), 1);

      // compute target block indexes
      long minPos = UtilFunctions.toLong(mb1.minNonZero());
      long maxPos = UtilFunctions.toLong(mb1.max());
      long rowIX1 = (minPos - 1) / _brlen + 1;
      long rowIX2 = (maxPos - 1) / _brlen + 1;
      boolean multipleOuts = (rowIX1 != rowIX2);

      if (minPos >= 1) // at least one row selected
      {
        // output sparsity estimate
        double spmb1 = OptimizerUtils.getSparsity(mb1.getNumRows(), 1, mb1.getNonZeros());
        long estnnz = (long) (spmb1 * mb2.getNonZeros());
        boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, mb2.getNumColumns(), estnnz);

        // compute and allocate output blocks
        MatrixBlock out1 = new MatrixBlock();
        MatrixBlock out2 = multipleOuts ? new MatrixBlock() : null;
        out1.reset(_brlen, mb2.getNumColumns(), sparse);
        if (out2 != null)
          out2.reset(
              UtilFunctions.computeBlockSize(_rlen, rowIX2, _brlen), mb2.getNumColumns(), sparse);

        // compute core matrix permutation (assumes that out1 has default blocksize,
        // hence we do a meta data correction afterwards)
        mb1.permutationMatrixMultOperations(mb2, out1, out2);
        out1.setNumRows(UtilFunctions.computeBlockSize(_rlen, rowIX1, _brlen));
        ret.add(
            new Tuple2<MatrixIndexes, MatrixBlock>(
                new MatrixIndexes(rowIX1, ixIn.getColumnIndex()), out1));
        if (out2 != null)
          ret.add(
              new Tuple2<MatrixIndexes, MatrixBlock>(
                  new MatrixIndexes(rowIX2, ixIn.getColumnIndex()), out2));
      }

      return ret;
    }