/**
   * @param target
   * @param groups
   * @param brlen
   * @param bclen
   * @param outlist
   * @throws DMLRuntimeException
   */
  public static void performMapGroupedAggregate(
      Operator op,
      IndexedMatrixValue inTarget,
      MatrixBlock groups,
      int ngroups,
      int brlen,
      int bclen,
      ArrayList<IndexedMatrixValue> outlist)
      throws DMLRuntimeException {
    MatrixIndexes ix = inTarget.getIndexes();
    MatrixBlock target = (MatrixBlock) inTarget.getValue();

    // execute grouped aggregate operations
    MatrixBlock out = groups.groupedAggOperations(target, null, new MatrixBlock(), ngroups, op);

    if (out.getNumRows() <= brlen && out.getNumColumns() <= bclen) {
      // single output block
      outlist.add(new IndexedMatrixValue(new MatrixIndexes(1, ix.getColumnIndex()), out));
    } else {
      // multiple output blocks (by op def, single column block )
      for (int blockRow = 0;
          blockRow < (int) Math.ceil(out.getNumRows() / (double) brlen);
          blockRow++) {
        int maxRow =
            (blockRow * brlen + brlen < out.getNumRows())
                ? brlen
                : out.getNumRows() - blockRow * brlen;
        int row_offset = blockRow * brlen;

        // copy submatrix to block
        MatrixBlock tmp =
            out.sliceOperations(
                row_offset, row_offset + maxRow - 1, 0, out.getNumColumns() - 1, new MatrixBlock());

        // append block to result cache
        outlist.add(
            new IndexedMatrixValue(new MatrixIndexes(blockRow + 1, ix.getColumnIndex()), tmp));
      }
    }
  }
  /**
   * @param in
   * @param ixrange
   * @param brlen
   * @param bclen
   * @param rlen
   * @param clen
   * @param outlist
   * @throws DMLRuntimeException
   */
  public static void performShift(
      IndexedMatrixValue in,
      IndexRange ixrange,
      int brlen,
      int bclen,
      long rlen,
      long clen,
      ArrayList<IndexedMatrixValue> outlist)
      throws DMLRuntimeException {
    MatrixIndexes ix = in.getIndexes();
    MatrixBlock mb = (MatrixBlock) in.getValue();

    long start_lhs_globalRowIndex = ixrange.rowStart + (ix.getRowIndex() - 1) * brlen;
    long start_lhs_globalColIndex = ixrange.colStart + (ix.getColumnIndex() - 1) * bclen;
    long end_lhs_globalRowIndex = start_lhs_globalRowIndex + mb.getNumRows() - 1;
    long end_lhs_globalColIndex = start_lhs_globalColIndex + mb.getNumColumns() - 1;

    long start_lhs_rowIndex = UtilFunctions.computeBlockIndex(start_lhs_globalRowIndex, brlen);
    long end_lhs_rowIndex = UtilFunctions.computeBlockIndex(end_lhs_globalRowIndex, brlen);
    long start_lhs_colIndex = UtilFunctions.computeBlockIndex(start_lhs_globalColIndex, bclen);
    long end_lhs_colIndex = UtilFunctions.computeBlockIndex(end_lhs_globalColIndex, bclen);

    for (long leftRowIndex = start_lhs_rowIndex; leftRowIndex <= end_lhs_rowIndex; leftRowIndex++) {
      for (long leftColIndex = start_lhs_colIndex;
          leftColIndex <= end_lhs_colIndex;
          leftColIndex++) {

        // Calculate global index of right hand side block
        long lhs_rl = Math.max((leftRowIndex - 1) * brlen + 1, start_lhs_globalRowIndex);
        long lhs_ru = Math.min(leftRowIndex * brlen, end_lhs_globalRowIndex);
        long lhs_cl = Math.max((leftColIndex - 1) * bclen + 1, start_lhs_globalColIndex);
        long lhs_cu = Math.min(leftColIndex * bclen, end_lhs_globalColIndex);

        int lhs_lrl = UtilFunctions.computeCellInBlock(lhs_rl, brlen);
        int lhs_lru = UtilFunctions.computeCellInBlock(lhs_ru, brlen);
        int lhs_lcl = UtilFunctions.computeCellInBlock(lhs_cl, bclen);
        int lhs_lcu = UtilFunctions.computeCellInBlock(lhs_cu, bclen);

        long rhs_rl = lhs_rl - ixrange.rowStart + 1;
        long rhs_ru = rhs_rl + (lhs_ru - lhs_rl);
        long rhs_cl = lhs_cl - ixrange.colStart + 1;
        long rhs_cu = rhs_cl + (lhs_cu - lhs_cl);

        int rhs_lrl = UtilFunctions.computeCellInBlock(rhs_rl, brlen);
        int rhs_lru = UtilFunctions.computeCellInBlock(rhs_ru, brlen);
        int rhs_lcl = UtilFunctions.computeCellInBlock(rhs_cl, bclen);
        int rhs_lcu = UtilFunctions.computeCellInBlock(rhs_cu, bclen);

        MatrixBlock slicedRHSBlk =
            mb.sliceOperations(rhs_lrl, rhs_lru, rhs_lcl, rhs_lcu, new MatrixBlock());

        int lbrlen = UtilFunctions.computeBlockSize(rlen, leftRowIndex, brlen);
        int lbclen = UtilFunctions.computeBlockSize(clen, leftColIndex, bclen);
        MatrixBlock resultBlock = new MatrixBlock(lbrlen, lbclen, false);
        resultBlock =
            resultBlock.leftIndexingOperations(
                slicedRHSBlk, lhs_lrl, lhs_lru, lhs_lcl, lhs_lcu, null, UpdateType.COPY);
        outlist.add(
            new IndexedMatrixValue(new MatrixIndexes(leftRowIndex, leftColIndex), resultBlock));
      }
    }
  }