/**
   * MR Cumsum is currently based on a multipass algorithm of (1) preaggregation and (2) subsequent
   * offsetting. Note that we currently support one robust physical operator but many alternative
   * realizations are possible for specific scenarios (e.g., when the preaggregated intermediate fit
   * into the map task memory budget) or by creating custom job types.
   *
   * @return
   * @throws HopsException
   * @throws LopsException
   */
  private Lop constructLopsMRCumulativeUnary() throws HopsException, LopsException {
    Hop input = getInput().get(0);
    long rlen = input.getDim1();
    long clen = input.getDim2();
    long brlen = input.getRowsInBlock();
    long bclen = input.getColsInBlock();
    boolean force = !dimsKnown() || _etypeForced == ExecType.MR;
    OperationTypes aggtype = getCumulativeAggType();

    Lop X = input.constructLops();
    Lop TEMP = X;
    ArrayList<Lop> DATA = new ArrayList<Lop>();
    int level = 0;

    // recursive preaggregation until aggregates fit into CP memory budget
    while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen)
                    + OptimizerUtils.estimateSize(1, clen))
                > OptimizerUtils.getLocalMemBudget()
            && TEMP.getOutputParameters().getNumRows() > 1)
        || force) {
      DATA.add(TEMP);

      // preaggregation per block
      long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen);
      Lop preagg =
          new CumulativePartialAggregate(
              TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
      preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
      setLineNumbers(preagg);

      Group group = new Group(preagg, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
      group.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
      setLineNumbers(group);

      Aggregate agg =
          new Aggregate(
              group, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
      agg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
      agg.setupCorrectionLocation(
          CorrectionLocationType
              .NONE); // aggregation uses kahanSum but the inputs do not have correction values
      setLineNumbers(agg);
      TEMP = agg;
      level++;
      force = false; // in case of unknowns, generate one level
    }

    // in-memory cum sum (of partial aggregates)
    if (TEMP.getOutputParameters().getNumRows() != 1) {
      int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
      Unary unary1 =
          new Unary(
              TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k);
      unary1
          .getOutputParameters()
          .setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1);
      setLineNumbers(unary1);
      TEMP = unary1;
    }

    // split, group and mr cumsum
    while (level-- > 0) {
      double init = getCumulativeInitValue();
      CumulativeSplitAggregate split =
          new CumulativeSplitAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, init);
      split.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
      setLineNumbers(split);

      Group group1 =
          new Group(DATA.get(level), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
      group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
      setLineNumbers(group1);

      Group group2 = new Group(split, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
      group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
      setLineNumbers(group2);

      CumulativeOffsetBinary binary =
          new CumulativeOffsetBinary(
              group1, group2, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
      binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
      setLineNumbers(binary);
      TEMP = binary;
    }

    return TEMP;
  }
  @Override
  public Lop constructLops() throws HopsException, LopsException {
    // return already created lops
    if (getLops() != null) return getLops();

    try {
      ExecType et = optFindExecType();
      Hop input = getInput().get(0);

      if (et == ExecType.CP) {
        Lop agg1 = null;
        if (isTernaryAggregateRewriteApplicable()) {
          agg1 = constructLopsTernaryAggregateRewrite(et);
        } else if (isUnaryAggregateOuterCPRewriteApplicable()) {
          OperationTypes op = HopsAgg2Lops.get(_op);
          DirectionTypes dir = HopsDirection2Lops.get(_direction);

          BinaryOp binput = (BinaryOp) getInput().get(0);
          agg1 =
              new UAggOuterChain(
                  binput.getInput().get(0).constructLops(),
                  binput.getInput().get(1).constructLops(),
                  op,
                  dir,
                  HopsOpOp2LopsB.get(binput.getOp()),
                  DataType.MATRIX,
                  getValueType(),
                  ExecType.CP);
          PartialAggregate.setDimensionsBasedOnDirection(
              agg1, getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), dir);

          if (getDataType() == DataType.SCALAR) {
            UnaryCP unary1 =
                new UnaryCP(
                    agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
            unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
            setLineNumbers(unary1);
            setLops(unary1);
          }

        } else { // general case
          int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
          if (DMLScript.USE_ACCELERATOR
              && (DMLScript.FORCE_ACCELERATOR
                  || getMemEstimate() < OptimizerUtils.GPU_MEMORY_BUDGET)
              && (_op == AggOp.SUM)) {
            et = ExecType.GPU;
            k = 1;
          }
          agg1 =
              new PartialAggregate(
                  input.constructLops(),
                  HopsAgg2Lops.get(_op),
                  HopsDirection2Lops.get(_direction),
                  getDataType(),
                  getValueType(),
                  et,
                  k);
        }

        setOutputDimensions(agg1);
        setLineNumbers(agg1);
        setLops(agg1);

        if (getDataType() == DataType.SCALAR) {
          agg1.getOutputParameters()
              .setDimensions(1, 1, getRowsInBlock(), getColsInBlock(), getNnz());
        }
      } else if (et == ExecType.MR) {
        OperationTypes op = HopsAgg2Lops.get(_op);
        DirectionTypes dir = HopsDirection2Lops.get(_direction);

        // unary aggregate operation
        Lop transform1 = null;
        if (isUnaryAggregateOuterRewriteApplicable()) {
          BinaryOp binput = (BinaryOp) getInput().get(0);
          transform1 =
              new UAggOuterChain(
                  binput.getInput().get(0).constructLops(),
                  binput.getInput().get(1).constructLops(),
                  op,
                  dir,
                  HopsOpOp2LopsB.get(binput.getOp()),
                  DataType.MATRIX,
                  getValueType(),
                  ExecType.MR);
          PartialAggregate.setDimensionsBasedOnDirection(
              transform1,
              getDim1(),
              getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              dir);
        } else // default
        {
          transform1 =
              new PartialAggregate(input.constructLops(), op, dir, DataType.MATRIX, getValueType());
          ((PartialAggregate) transform1)
              .setDimensionsBasedOnDirection(
                  getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock());
        }
        setLineNumbers(transform1);

        // aggregation if required
        Lop aggregate = null;
        Group group1 = null;
        Aggregate agg1 = null;
        if (requiresAggregation(input, _direction) || transform1 instanceof UAggOuterChain) {
          group1 =
              new Group(transform1, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
          group1
              .getOutputParameters()
              .setDimensions(
                  getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), getNnz());
          setLineNumbers(group1);

          agg1 = new Aggregate(group1, HopsAgg2Lops.get(_op), DataType.MATRIX, getValueType(), et);
          agg1.getOutputParameters()
              .setDimensions(
                  getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), getNnz());
          agg1.setupCorrectionLocation(PartialAggregate.getCorrectionLocation(op, dir));
          setLineNumbers(agg1);

          aggregate = agg1;
        } else {
          ((PartialAggregate) transform1).setDropCorrection();
          aggregate = transform1;
        }

        setLops(aggregate);

        // cast if required
        if (getDataType() == DataType.SCALAR) {

          // Set the dimensions of PartialAggregate LOP based on the
          // direction in which aggregation is performed
          PartialAggregate.setDimensionsBasedOnDirection(
              transform1,
              input.getDim1(),
              input.getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              dir);

          if (group1 != null && agg1 != null) { // if aggregation required
            group1
                .getOutputParameters()
                .setDimensions(
                    input.getDim1(),
                    input.getDim2(),
                    input.getRowsInBlock(),
                    input.getColsInBlock(),
                    getNnz());
            agg1.getOutputParameters()
                .setDimensions(1, 1, input.getRowsInBlock(), input.getColsInBlock(), getNnz());
          }

          UnaryCP unary1 =
              new UnaryCP(
                  aggregate,
                  HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR),
                  getDataType(),
                  getValueType());
          unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
          setLineNumbers(unary1);
          setLops(unary1);
        }
      } else if (et == ExecType.SPARK) {
        OperationTypes op = HopsAgg2Lops.get(_op);
        DirectionTypes dir = HopsDirection2Lops.get(_direction);

        // unary aggregate
        if (isTernaryAggregateRewriteApplicable()) {
          Lop aggregate = constructLopsTernaryAggregateRewrite(et);
          setOutputDimensions(aggregate); // 0x0 (scalar)
          setLineNumbers(aggregate);
          setLops(aggregate);
        } else if (isUnaryAggregateOuterSPRewriteApplicable()) {
          BinaryOp binput = (BinaryOp) getInput().get(0);
          Lop transform1 =
              new UAggOuterChain(
                  binput.getInput().get(0).constructLops(),
                  binput.getInput().get(1).constructLops(),
                  op,
                  dir,
                  HopsOpOp2LopsB.get(binput.getOp()),
                  DataType.MATRIX,
                  getValueType(),
                  ExecType.SPARK);
          PartialAggregate.setDimensionsBasedOnDirection(
              transform1,
              getDim1(),
              getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              dir);
          setLineNumbers(transform1);
          setLops(transform1);

          if (getDataType() == DataType.SCALAR) {
            UnaryCP unary1 =
                new UnaryCP(
                    transform1,
                    HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR),
                    getDataType(),
                    getValueType());
            unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
            setLineNumbers(unary1);
            setLops(unary1);
          }

        } else // default
        {
          boolean needAgg = requiresAggregation(input, _direction);
          SparkAggType aggtype = getSparkUnaryAggregationType(needAgg);

          PartialAggregate aggregate =
              new PartialAggregate(
                  input.constructLops(),
                  HopsAgg2Lops.get(_op),
                  HopsDirection2Lops.get(_direction),
                  DataType.MATRIX,
                  getValueType(),
                  aggtype,
                  et);
          aggregate.setDimensionsBasedOnDirection(
              getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock());
          setLineNumbers(aggregate);
          setLops(aggregate);

          if (getDataType() == DataType.SCALAR) {
            UnaryCP unary1 =
                new UnaryCP(
                    aggregate,
                    HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR),
                    getDataType(),
                    getValueType());
            unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
            setLineNumbers(unary1);
            setLops(unary1);
          }
        }
      }
    } catch (Exception e) {
      throw new HopsException(
          this.printErrorLocation() + "In AggUnary Hop, error constructing Lops ", e);
    }

    // add reblock/checkpoint lops if necessary
    constructAndSetLopsDataFlowProperties();

    // return created lops
    return getLops();
  }
  private Lop constructLopsIQM() throws HopsException, LopsException {

    ExecType et = optFindExecType();

    Hop input = getInput().get(0);
    if (et == ExecType.MR) {
      CombineUnary combine =
          CombineUnary.constructCombineLop(input.constructLops(), DataType.MATRIX, getValueType());
      combine
          .getOutputParameters()
          .setDimensions(
              input.getDim1(),
              input.getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              input.getNnz());

      SortKeys sort =
          SortKeys.constructSortByValueLop(
              combine,
              SortKeys.OperationTypes.WithoutWeights,
              DataType.MATRIX,
              ValueType.DOUBLE,
              ExecType.MR);

      // Sort dimensions are same as the first input
      sort.getOutputParameters()
          .setDimensions(
              input.getDim1(),
              input.getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              input.getNnz());

      Data lit = Data.createLiteralLop(ValueType.DOUBLE, Double.toString(0.25));

      lit.setAllPositions(
          this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());

      PickByCount pick =
          new PickByCount(
              sort, lit, DataType.MATRIX, getValueType(), PickByCount.OperationTypes.RANGEPICK);

      pick.getOutputParameters().setDimensions(-1, -1, getRowsInBlock(), getColsInBlock(), -1);
      setLineNumbers(pick);

      PartialAggregate pagg =
          new PartialAggregate(
              pick,
              HopsAgg2Lops.get(Hop.AggOp.SUM),
              HopsDirection2Lops.get(Hop.Direction.RowCol),
              DataType.MATRIX,
              getValueType());
      setLineNumbers(pagg);

      // Set the dimensions of PartialAggregate LOP based on the
      // direction in which aggregation is performed
      pagg.setDimensionsBasedOnDirection(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock());

      Group group1 = new Group(pagg, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
      group1
          .getOutputParameters()
          .setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
      setLineNumbers(group1);

      Aggregate agg1 =
          new Aggregate(
              group1,
              HopsAgg2Lops.get(Hop.AggOp.SUM),
              DataType.MATRIX,
              getValueType(),
              ExecType.MR);
      agg1.getOutputParameters()
          .setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
      agg1.setupCorrectionLocation(pagg.getCorrectionLocation());
      setLineNumbers(agg1);

      UnaryCP unary1 =
          new UnaryCP(
              agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
      unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
      setLineNumbers(unary1);

      Unary iqm =
          new Unary(
              sort,
              unary1,
              Unary.OperationTypes.MR_IQM,
              DataType.SCALAR,
              ValueType.DOUBLE,
              ExecType.CP);
      iqm.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
      setLineNumbers(iqm);

      return iqm;
    } else {
      SortKeys sort =
          SortKeys.constructSortByValueLop(
              input.constructLops(),
              SortKeys.OperationTypes.WithoutWeights,
              DataType.MATRIX,
              ValueType.DOUBLE,
              et);
      sort.getOutputParameters()
          .setDimensions(
              input.getDim1(),
              input.getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              input.getNnz());
      PickByCount pick =
          new PickByCount(
              sort, null, getDataType(), getValueType(), PickByCount.OperationTypes.IQM, et, true);

      pick.getOutputParameters()
          .setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
      setLineNumbers(pick);

      return pick;
    }
  }