// right indexing doesn't really need the dimensionality of the left matrix
  // private static Lops dummy=new Data(null, Data.OperationTypes.READ, null, "-1", DataType.SCALAR,
  // ValueType.INT, false);
  public IndexingOp(
      String l,
      DataType dt,
      ValueType vt,
      Hop inpMatrix,
      Hop inpRowL,
      Hop inpRowU,
      Hop inpColL,
      Hop inpColU,
      boolean passedRowsLEU,
      boolean passedColsLEU) {
    super(l, dt, vt);

    getInput().add(0, inpMatrix);
    getInput().add(1, inpRowL);
    getInput().add(2, inpRowU);
    getInput().add(3, inpColL);
    getInput().add(4, inpColU);

    // create hops if one of them is null
    inpMatrix.getParent().add(this);
    inpRowL.getParent().add(this);
    inpRowU.getParent().add(this);
    inpColL.getParent().add(this);
    inpColU.getParent().add(this);

    // set information whether left indexing operation involves row (n x 1) or column (1 x m) matrix
    setRowLowerEqualsUpper(passedRowsLEU);
    setColLowerEqualsUpper(passedColsLEU);
  }
 @Override
 public void refreshSizeInformation() {
   if (getDataType() == DataType.SCALAR) {
     // do nothing always known
   } else if (_op == OpOp1.CAST_AS_MATRIX && getInput().get(0).getDataType() == DataType.SCALAR) {
     // prevent propagating 0 from scalar (which would be interpreted as unknown)
     setDim1(1);
     setDim2(1);
   } else // general case
   {
     // If output is a Matrix then this operation is of type (B = op(A))
     // Dimensions of B are same as that of A, and sparsity may/maynot change
     Hop input = getInput().get(0);
     setDim1(input.getDim1());
     setDim2(input.getDim2());
     if (_op == OpOp1.ABS
         || _op == OpOp1.COS
         || _op == OpOp1.SIN
         || _op == OpOp1.TAN
         || _op == OpOp1.ACOS
         || _op == OpOp1.ASIN
         || _op == OpOp1.ATAN
         || _op == OpOp1.SQRT
         || _op == OpOp1.ROUND
         || _op == OpOp1.SPROP) // sparsity preserving
     {
       setNnz(input.getNnz());
     }
   }
 }
 public void printMe() throws HopsException {
   if (getVisited() != VisitStatus.DONE) {
     super.printMe();
     for (Hop h : getInput()) {
       h.printMe();
     }
   }
   setVisited(VisitStatus.DONE);
 }
 public void printMe() throws HopsException {
   if (LOG.isDebugEnabled()) {
     if (getVisited() != VisitStatus.DONE) {
       super.printMe();
       LOG.debug("  Operation: " + _op);
       for (Hop h : getInput()) {
         h.printMe();
       }
     }
     setVisited(VisitStatus.DONE);
   }
 }
  @Override
  public boolean compare(Hop that) {
    if (!(that instanceof IndexingOp) || getInput().size() != that.getInput().size()) {
      return false;
    }

    return (getInput().get(0) == that.getInput().get(0)
        && getInput().get(1) == that.getInput().get(1)
        && getInput().get(2) == that.getInput().get(2)
        && getInput().get(3) == that.getInput().get(3)
        && getInput().get(4) == that.getInput().get(4));
  }
 @Override
 public void refreshSizeInformation() {
   if (getDataType() != DataType.SCALAR) {
     Hop input = getInput().get(0);
     if (_direction == Direction.Col) // colwise computations
     {
       setDim1(1);
       setDim2(input.getDim2());
     } else if (_direction == Direction.Row) {
       setDim1(input.getDim1());
       setDim2(1);
     }
   }
 }
 public AggUnaryOp(String l, DataType dt, ValueType vt, AggOp o, Direction idx, Hop inp) {
   super(l, dt, vt);
   _op = o;
   _direction = idx;
   getInput().add(0, inp);
   inp.getParent().add(this);
 }
  @Override
  public void refreshSizeInformation() {
    Hop input1 = getInput().get(0); // original matrix
    Hop input2 = getInput().get(1); // inpRowL
    Hop input3 = getInput().get(2); // inpRowU
    Hop input4 = getInput().get(3); // inpColL
    Hop input5 = getInput().get(4); // inpColU

    // parse input information
    boolean allRows =
        (input2 instanceof LiteralOp
            && HopRewriteUtils.getIntValueSafe((LiteralOp) input2) == 1
            && input3 instanceof UnaryOp
            && ((UnaryOp) input3).getOp() == OpOp1.NROW);
    boolean allCols =
        (input4 instanceof LiteralOp
            && HopRewriteUtils.getIntValueSafe((LiteralOp) input4) == 1
            && input5 instanceof UnaryOp
            && ((UnaryOp) input5).getOp() == OpOp1.NCOL);
    boolean constRowRange = (input2 instanceof LiteralOp && input3 instanceof LiteralOp);
    boolean constColRange = (input4 instanceof LiteralOp && input5 instanceof LiteralOp);

    // set dimension information
    if (_rowLowerEqualsUpper) // ROWS
    setDim1(1);
    else if (allRows) setDim1(input1.getDim1());
    else if (constRowRange) {
      setDim1(
          HopRewriteUtils.getIntValueSafe((LiteralOp) input3)
              - HopRewriteUtils.getIntValueSafe((LiteralOp) input2)
              + 1);
    } else if (isBlockIndexingExpression(input2, input3)) {
      setDim1(getBlockIndexingExpressionSize(input2, input3));
    }

    if (_colLowerEqualsUpper) // COLS
    setDim2(1);
    else if (allCols) setDim2(input1.getDim2());
    else if (constColRange) {
      setDim2(
          HopRewriteUtils.getIntValueSafe((LiteralOp) input5)
              - HopRewriteUtils.getIntValueSafe((LiteralOp) input4)
              + 1);
    } else if (isBlockIndexingExpression(input4, input5)) {
      setDim2(getBlockIndexingExpressionSize(input4, input5));
    }
  }
  public UnaryOp(String l, DataType dt, ValueType vt, OpOp1 o, Hop inp) throws HopsException {
    super(l, dt, vt);

    getInput().add(0, inp);
    inp.getParent().add(this);

    _op = o;

    // compute unknown dims and nnz
    refreshSizeInformation();
  }
  @Override
  public void computeMemEstimate(MemoTable memo) {
    // overwrites default hops behavior
    super.computeMemEstimate(memo);

    if (_op == Hop.OpOp1.NROW || _op == Hop.OpOp1.NCOL) // specific case for meta data ops
    {
      _memEstimate = OptimizerUtils.INT_SIZE;
      // _outputMemEstimate = OptimizerUtils.INT_SIZE;
      // _processingMemEstimate = 0;
    }
  }
  private boolean isTernaryAggregateRewriteApplicable() throws HopsException {
    boolean ret = false;

    // currently we support only sum over binary multiply but potentially
    // it can be generalized to any RC aggregate over two common binary operations
    if (OptimizerUtils.ALLOW_SUM_PRODUCT_REWRITES
        && _direction == Direction.RowCol
        && _op == AggOp.SUM) {
      Hop input1 = getInput().get(0);
      if (input1.getParent().size() == 1
          && // sum single consumer
          input1 instanceof BinaryOp
          && ((BinaryOp) input1).getOp() == OpOp2.MULT
          // As unary agg instruction is not implemented in MR and since MR is in maintenance mode,
          // postponed it.
          && input1.optFindExecType() != ExecType.MR) {
        Hop input11 = input1.getInput().get(0);
        Hop input12 = input1.getInput().get(1);

        if (input11 instanceof BinaryOp && ((BinaryOp) input11).getOp() == OpOp2.MULT) {
          // ternary, arbitrary matrices but no mv/outer operations.
          ret =
              HopRewriteUtils.isEqualSize(input11.getInput().get(0), input1)
                  && HopRewriteUtils.isEqualSize(input11.getInput().get(1), input1)
                  && HopRewriteUtils.isEqualSize(input12, input1);
        } else if (input12 instanceof BinaryOp && ((BinaryOp) input12).getOp() == OpOp2.MULT) {
          // ternary, arbitrary matrices but no mv/outer operations.
          ret =
              HopRewriteUtils.isEqualSize(input12.getInput().get(0), input1)
                  && HopRewriteUtils.isEqualSize(input12.getInput().get(1), input1)
                  && HopRewriteUtils.isEqualSize(input11, input1);
        } else {
          // binary, arbitrary matrices but no mv/outer operations.
          ret = HopRewriteUtils.isEqualSize(input11, input12);
        }
      }
    }

    return ret;
  }
  /**
   * Creates a client transaction to handle a new request. Gets the real message channel from the
   * superclass, and then creates a new client transaction wrapped around this channel.
   *
   * @param nextHop Hop to create a channel to contact.
   */
  public MessageChannel createMessageChannel(int sourcePort, Hop nextHop)
      throws UnknownHostException {
    synchronized (clientTransactions) {
      // New client transaction to return
      SIPTransaction returnChannel;

      // Create a new client transaction around the
      // superclass' message channel
      MessageChannel mc = super.createMessageChannel(sourcePort, nextHop);

      // Superclass will return null if no message processor
      // available for the transport.
      if (mc == null) return null;

      returnChannel = createClientTransaction(mc);
      clientTransactions.add(0, (SIPClientTransaction) returnChannel);
      ((SIPClientTransaction) returnChannel).setViaPort(nextHop.getPort());
      ((SIPClientTransaction) returnChannel).setViaHost(nextHop.getHost());
      // Add the transaction timer for the state machine.
      returnChannel.startTransactionTimer();
      return returnChannel;
    }
  }
  @Override
  public void computeMemEstimate(MemoTable memo) {
    // default behavior
    super.computeMemEstimate(memo);

    // try to infer via worstcase input statistics (for the case of dims known
    // but nnz initially unknown)
    MatrixCharacteristics mcM1 = memo.getAllInputStats(getInput().get(0));
    if (dimsKnown() && mcM1.getNonZeros() >= 0) {
      long lnnz = mcM1.getNonZeros(); // worst-case output nnz
      double lOutMemEst = computeOutputMemEstimate(_dim1, _dim2, lnnz);
      if (lOutMemEst < _outputMemEstimate) {
        _outputMemEstimate = lOutMemEst;
        _memEstimate = getInputOutputSize();
      }
    }
  }
  private boolean requiresAggregation(Hop input, Direction dir) {
    if (!ALLOW_UNARYAGG_WO_FINAL_AGG) return false; // customization not allowed

    boolean noAggRequired =
        (input.getDim1() > 1
                && input.getDim1() <= input.getRowsInBlock()
                && dir == Direction.Col) // e.g., colSums(X) with nrow(X)<=1000
            || (input.getDim2() > 1
                && input.getDim2() <= input.getColsInBlock()
                && dir == Direction.Row); // e.g., rowSums(X) with ncol(X)<=1000

    return !noAggRequired;
  }
  private boolean isUnaryAggregateOuterRewriteApplicable() {
    boolean ret = false;
    Hop input = getInput().get(0);

    if (input instanceof BinaryOp && ((BinaryOp) input).isOuterVectorOperator()) {
      // for special cases, we need to hold the broadcast twice in order to allow for
      // an efficient binary search over a plain java array
      double factor =
          (isCompareOperator(((BinaryOp) input).getOp())
                  && (_direction == Direction.Row
                      || _direction == Direction.Col
                      || _direction == Direction.RowCol)
                  && (_op == AggOp.SUM))
              ? 2.0
              : 1.0;

      factor +=
          (isCompareOperator(((BinaryOp) input).getOp())
                  && (_direction == Direction.Row || _direction == Direction.Col)
                  && (_op == AggOp.MAXINDEX || _op == AggOp.MININDEX))
              ? 1.0
              : 0.0;

      // note: memory constraint only needs to take the rhs into account because the output
      // is guaranteed to be an aggregate of <=16KB
      Hop right = input.getInput().get(1);
      if ((right.dimsKnown()
              && factor * OptimizerUtils.estimateSize(right.getDim1(), right.getDim2())
                  < OptimizerUtils.getRemoteMemBudgetMap(true)) // dims known and estimate fits
          || (!right.dimsKnown()
              && factor * right.getOutputMemEstimate()
                  < OptimizerUtils.getRemoteMemBudgetMap(
                      true))) // dims unknown but worst-case estimate fits
      {
        ret = true;
      }
    }

    return ret;
  }
  /**
   * Indicates if the lbound:rbound expressions is of the form "(c * (i - 1) + 1) : (c * i)", where
   * we could use c as a tight size estimate.
   *
   * @param lbound
   * @param ubound
   * @return
   */
  private boolean isBlockIndexingExpression(Hop lbound, Hop ubound) {
    boolean ret = false;
    LiteralOp constant = null;
    DataOp var = null;

    // handle lower bound
    if (lbound instanceof BinaryOp
        && ((BinaryOp) lbound).getOp() == OpOp2.PLUS
        && lbound.getInput().get(1) instanceof LiteralOp
        && HopRewriteUtils.getDoubleValueSafe((LiteralOp) lbound.getInput().get(1)) == 1
        && lbound.getInput().get(0) instanceof BinaryOp) {
      BinaryOp lmult = (BinaryOp) lbound.getInput().get(0);
      if (lmult.getOp() == OpOp2.MULT
          && lmult.getInput().get(0) instanceof LiteralOp
          && lmult.getInput().get(1) instanceof BinaryOp) {
        BinaryOp lminus = (BinaryOp) lmult.getInput().get(1);
        if (lminus.getOp() == OpOp2.MINUS
            && lminus.getInput().get(1) instanceof LiteralOp
            && HopRewriteUtils.getDoubleValueSafe((LiteralOp) lminus.getInput().get(1)) == 1
            && lminus.getInput().get(0) instanceof DataOp) {
          constant = (LiteralOp) lmult.getInput().get(0);
          var = (DataOp) lminus.getInput().get(0);
        }
      }
    }

    // handle upper bound
    if (var != null
        && constant != null
        && ubound instanceof BinaryOp
        && ubound.getInput().get(0) instanceof LiteralOp
        && ubound.getInput().get(1) instanceof DataOp
        && ubound.getInput().get(1).getName().equals(var.getName())) {
      LiteralOp constant2 = (LiteralOp) ubound.getInput().get(0);
      ret =
          (HopRewriteUtils.getDoubleValueSafe(constant)
              == HopRewriteUtils.getDoubleValueSafe(constant2));
    }

    return ret;
  }
  /**
   * This will check if there is sufficient memory locally (twice the size of second matrix, for
   * original and sort data), and remotely (size of second matrix (sorted data)).
   *
   * @return true if sufficient memory
   */
  private boolean isUnaryAggregateOuterSPRewriteApplicable() {
    boolean ret = false;
    Hop input = getInput().get(0);

    if (input instanceof BinaryOp && ((BinaryOp) input).isOuterVectorOperator()) {
      // note: both cases (partitioned matrix, and sorted double array), require to
      // fit the broadcast twice into the local memory budget. Also, the memory
      // constraint only needs to take the rhs into account because the output is
      // guaranteed to be an aggregate of <=16KB

      Hop right = input.getInput().get(1);

      double size =
          right.dimsKnown()
              ? OptimizerUtils.estimateSize(right.getDim1(), right.getDim2())
              : // dims known and estimate fits
              right.getOutputMemEstimate(); // dims unknown but worst-case estimate fits

      if (_op == AggOp.MAXINDEX || _op == AggOp.MININDEX) {
        double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget();
        double memBudgetLocal = OptimizerUtils.getLocalMemBudget();

        // basic requirement: the broadcast needs to to fit twice in the remote broadcast memory
        // and local memory budget because we have to create a partitioned broadcast
        // memory and hand it over to the spark context as in-memory object
        ret = (2 * size < memBudgetExec && 2 * size < memBudgetLocal);

      } else {
        if (OptimizerUtils.checkSparkBroadcastMemoryBudget(size)) {
          ret = true;
        }
      }
    }

    return ret;
  }
 /**
  * @param lbound
  * @param ubound
  * @return
  */
 private long getBlockIndexingExpressionSize(Hop lbound, Hop ubound) {
   // NOTE: ensure consistency with isBlockIndexingExpression
   LiteralOp c = (LiteralOp) ubound.getInput().get(0); // (c*i)
   return HopRewriteUtils.getIntValueSafe(c);
 }
  @Override
  public Lop constructLops() throws HopsException, LopsException {
    // reuse existing lop
    if (getLops() != null) return getLops();

    try {
      Hop input = getInput().get(0);

      if (getDataType() == DataType.SCALAR // value type casts or matrix to scalar
          || (_op == OpOp1.CAST_AS_MATRIX && getInput().get(0).getDataType() == DataType.SCALAR)
          || (_op == OpOp1.CAST_AS_FRAME && getInput().get(0).getDataType() == DataType.SCALAR)) {
        if (_op == Hop.OpOp1.IQM) // special handling IQM
        {
          Lop iqmLop = constructLopsIQM();
          setLops(iqmLop);
        } else if (_op == Hop.OpOp1.MEDIAN) {
          Lop medianLop = constructLopsMedian();
          setLops(medianLop);
        } else // general case SCALAR/CAST (always in CP)
        {
          UnaryCP.OperationTypes optype = HopsOpOp1LopsUS.get(_op);
          if (optype == null)
            throw new HopsException(
                "Unknown UnaryCP lop type for UnaryOp operation type '" + _op + "'");

          UnaryCP unary1 =
              new UnaryCP(input.constructLops(), optype, getDataType(), getValueType());
          setOutputDimensions(unary1);
          setLineNumbers(unary1);

          setLops(unary1);
        }
      } else // general case MATRIX
      {
        ExecType et = optFindExecType();

        // special handling cumsum/cumprod/cummin/cumsum
        if (isCumulativeUnaryOperation() && et != ExecType.CP) {
          // TODO additional physical operation if offsets fit in memory
          Lop cumsumLop = null;
          if (et == ExecType.MR) cumsumLop = constructLopsMRCumulativeUnary();
          else cumsumLop = constructLopsSparkCumulativeUnary();
          setLops(cumsumLop);
        } else // default unary
        {
          int k =
              isCumulativeUnaryOperation()
                  ? OptimizerUtils.getConstrainedNumThreads(_maxNumThreads)
                  : 1;
          Unary unary1 =
              new Unary(
                  input.constructLops(),
                  HopsOpOp1LopsU.get(_op),
                  getDataType(),
                  getValueType(),
                  et,
                  k);
          setOutputDimensions(unary1);
          setLineNumbers(unary1);
          setLops(unary1);
        }
      }
    } catch (Exception e) {
      throw new HopsException(
          this.printErrorLocation() + "error constructing Lops for UnaryOp Hop -- \n ", e);
    }

    // add reblock/checkpoint lops if necessary
    constructAndSetLopsDataFlowProperties();

    return getLops();
  }
Exemple #20
0
  /**
   * Return addresses for default proxy to forward the request to. The list is organized in the
   * following priority. If the requestURI refers directly to a host, the host and port information
   * are extracted from it and made the next hop on the list. If the default route has been
   * specified, then it is used to construct the next element of the list. <code>
   * RouteHeader firstRoute = (RouteHeader) req.getHeader( RouteHeader.NAME );
   * if (firstRoute!=null) {
   *   URI uri = firstRoute.getAddress().getURI();
   *    if (uri.isSIPUri()) {
   *       SipURI nextHop = (SipURI) uri;
   *       if ( nextHop.hasLrParam() ) {
   *           // OK, use it
   *       } else {
   *           nextHop = fixStrictRouting( req );        <--- Here, make the modifications as per RFC3261
   *       }
   *   } else {
   *       // error: non-SIP URI not allowed in Route headers
   *       throw new SipException( "Request has Route header with non-SIP URI" );
   *   }
   * } else if (outboundProxy!=null) {
   *   // use outbound proxy for nextHop
   * } else if ( req.getRequestURI().isSipURI() ) {
   *   // use request URI for nextHop
   * }
   *
   * </code>
   *
   * @param request is the sip request to route.
   */
  public Hop getNextHop(Request request) throws SipException {

    SIPRequest sipRequest = (SIPRequest) request;

    RequestLine requestLine = sipRequest.getRequestLine();
    if (requestLine == null) {
      return defaultRoute;
    }
    javax.sip.address.URI requestURI = requestLine.getUri();
    if (requestURI == null) throw new IllegalArgumentException("Bad message: Null requestURI");

    RouteList routes = sipRequest.getRouteHeaders();

    /*
     * In case the topmost Route header contains no 'lr' parameter (which
     * means the next hop is a strict router), the implementation will
     * perform 'Route Information Postprocessing' as described in RFC3261
     * section 16.6 step 6 (also known as "Route header popping"). That is,
     * the following modifications will be made to the request:
     *
     * The implementation places the Request-URI into the Route header field
     * as the last value.
     *
     * The implementation then places the first Route header field value
     * into the Request-URI and removes that value from the Route header
     * field.
     *
     * Subsequently, the request URI will be used as next hop target
     */

    if (routes != null) {

      // to send the request through a specified hop the application is
      // supposed to prepend the appropriate Route header which.
      Route route = (Route) routes.getFirst();
      URI uri = route.getAddress().getURI();
      if (uri.isSipURI()) {
        SipURI sipUri = (SipURI) uri;
        if (!sipUri.hasLrParam()) {

          fixStrictRouting(sipRequest);
          if (sipStack.isLoggingEnabled())
            sipStack.getStackLogger().logDebug("Route post processing fixed strict routing");
        }

        Hop hop = createHop(sipUri, request);
        if (sipStack.isLoggingEnabled())
          sipStack.getStackLogger().logDebug("NextHop based on Route:" + hop);
        return hop;
      } else {
        throw new SipException("First Route not a SIP URI");
      }

    } else if (requestURI.isSipURI() && ((SipURI) requestURI).getMAddrParam() != null) {
      Hop hop = createHop((SipURI) requestURI, request);
      if (sipStack.isLoggingEnabled())
        sipStack
            .getStackLogger()
            .logDebug("Using request URI maddr to route the request = " + hop.toString());

      // JvB: don't remove it!
      // ((SipURI) requestURI).removeParameter("maddr");

      return hop;

    } else if (defaultRoute != null) {
      if (sipStack.isLoggingEnabled())
        sipStack
            .getStackLogger()
            .logDebug("Using outbound proxy to route the request = " + defaultRoute.toString());
      return defaultRoute;
    } else if (requestURI.isSipURI()) {
      Hop hop = createHop((SipURI) requestURI, request);
      if (hop != null && sipStack.isLoggingEnabled())
        sipStack.getStackLogger().logDebug("Used request-URI for nextHop = " + hop.toString());
      else if (sipStack.isLoggingEnabled()) {
        sipStack.getStackLogger().logDebug("returning null hop -- loop detected");
      }
      return hop;

    } else {
      // The internal router should never be consulted for non-sip URIs.
      InternalErrorHandler.handleException(
          "Unexpected non-sip URI", this.sipStack.getStackLogger());
      return null;
    }
  }
  private Lop constructLopsIQM() throws HopsException, LopsException {

    ExecType et = optFindExecType();

    Hop input = getInput().get(0);
    if (et == ExecType.MR) {
      CombineUnary combine =
          CombineUnary.constructCombineLop(input.constructLops(), DataType.MATRIX, getValueType());
      combine
          .getOutputParameters()
          .setDimensions(
              input.getDim1(),
              input.getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              input.getNnz());

      SortKeys sort =
          SortKeys.constructSortByValueLop(
              combine,
              SortKeys.OperationTypes.WithoutWeights,
              DataType.MATRIX,
              ValueType.DOUBLE,
              ExecType.MR);

      // Sort dimensions are same as the first input
      sort.getOutputParameters()
          .setDimensions(
              input.getDim1(),
              input.getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              input.getNnz());

      Data lit = Data.createLiteralLop(ValueType.DOUBLE, Double.toString(0.25));

      lit.setAllPositions(
          this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());

      PickByCount pick =
          new PickByCount(
              sort, lit, DataType.MATRIX, getValueType(), PickByCount.OperationTypes.RANGEPICK);

      pick.getOutputParameters().setDimensions(-1, -1, getRowsInBlock(), getColsInBlock(), -1);
      setLineNumbers(pick);

      PartialAggregate pagg =
          new PartialAggregate(
              pick,
              HopsAgg2Lops.get(Hop.AggOp.SUM),
              HopsDirection2Lops.get(Hop.Direction.RowCol),
              DataType.MATRIX,
              getValueType());
      setLineNumbers(pagg);

      // Set the dimensions of PartialAggregate LOP based on the
      // direction in which aggregation is performed
      pagg.setDimensionsBasedOnDirection(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock());

      Group group1 = new Group(pagg, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
      group1
          .getOutputParameters()
          .setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
      setLineNumbers(group1);

      Aggregate agg1 =
          new Aggregate(
              group1,
              HopsAgg2Lops.get(Hop.AggOp.SUM),
              DataType.MATRIX,
              getValueType(),
              ExecType.MR);
      agg1.getOutputParameters()
          .setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
      agg1.setupCorrectionLocation(pagg.getCorrectionLocation());
      setLineNumbers(agg1);

      UnaryCP unary1 =
          new UnaryCP(
              agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
      unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
      setLineNumbers(unary1);

      Unary iqm =
          new Unary(
              sort,
              unary1,
              Unary.OperationTypes.MR_IQM,
              DataType.SCALAR,
              ValueType.DOUBLE,
              ExecType.CP);
      iqm.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
      setLineNumbers(iqm);

      return iqm;
    } else {
      SortKeys sort =
          SortKeys.constructSortByValueLop(
              input.constructLops(),
              SortKeys.OperationTypes.WithoutWeights,
              DataType.MATRIX,
              ValueType.DOUBLE,
              et);
      sort.getOutputParameters()
          .setDimensions(
              input.getDim1(),
              input.getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              input.getNnz());
      PickByCount pick =
          new PickByCount(
              sort, null, getDataType(), getValueType(), PickByCount.OperationTypes.IQM, et, true);

      pick.getOutputParameters()
          .setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
      setLineNumbers(pick);

      return pick;
    }
  }
  /**
   * MR Cumsum is currently based on a multipass algorithm of (1) preaggregation and (2) subsequent
   * offsetting. Note that we currently support one robust physical operator but many alternative
   * realizations are possible for specific scenarios (e.g., when the preaggregated intermediate fit
   * into the map task memory budget) or by creating custom job types.
   *
   * @return
   * @throws HopsException
   * @throws LopsException
   */
  private Lop constructLopsMRCumulativeUnary() throws HopsException, LopsException {
    Hop input = getInput().get(0);
    long rlen = input.getDim1();
    long clen = input.getDim2();
    long brlen = input.getRowsInBlock();
    long bclen = input.getColsInBlock();
    boolean force = !dimsKnown() || _etypeForced == ExecType.MR;
    OperationTypes aggtype = getCumulativeAggType();

    Lop X = input.constructLops();
    Lop TEMP = X;
    ArrayList<Lop> DATA = new ArrayList<Lop>();
    int level = 0;

    // recursive preaggregation until aggregates fit into CP memory budget
    while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen)
                    + OptimizerUtils.estimateSize(1, clen))
                > OptimizerUtils.getLocalMemBudget()
            && TEMP.getOutputParameters().getNumRows() > 1)
        || force) {
      DATA.add(TEMP);

      // preaggregation per block
      long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen);
      Lop preagg =
          new CumulativePartialAggregate(
              TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
      preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
      setLineNumbers(preagg);

      Group group = new Group(preagg, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
      group.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
      setLineNumbers(group);

      Aggregate agg =
          new Aggregate(
              group, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
      agg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
      agg.setupCorrectionLocation(
          CorrectionLocationType
              .NONE); // aggregation uses kahanSum but the inputs do not have correction values
      setLineNumbers(agg);
      TEMP = agg;
      level++;
      force = false; // in case of unknowns, generate one level
    }

    // in-memory cum sum (of partial aggregates)
    if (TEMP.getOutputParameters().getNumRows() != 1) {
      int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
      Unary unary1 =
          new Unary(
              TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k);
      unary1
          .getOutputParameters()
          .setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1);
      setLineNumbers(unary1);
      TEMP = unary1;
    }

    // split, group and mr cumsum
    while (level-- > 0) {
      double init = getCumulativeInitValue();
      CumulativeSplitAggregate split =
          new CumulativeSplitAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, init);
      split.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
      setLineNumbers(split);

      Group group1 =
          new Group(DATA.get(level), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
      group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
      setLineNumbers(group1);

      Group group2 = new Group(split, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
      group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
      setLineNumbers(group2);

      CumulativeOffsetBinary binary =
          new CumulativeOffsetBinary(
              group1, group2, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
      binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
      setLineNumbers(binary);
      TEMP = binary;
    }

    return TEMP;
  }
  private Lop constructLopsTernaryAggregateRewrite(ExecType et)
      throws HopsException, LopsException {
    Hop input1 = getInput().get(0);
    Hop input11 = input1.getInput().get(0);
    Hop input12 = input1.getInput().get(1);

    Lop ret = null;
    Lop in1 = null;
    Lop in2 = null;
    Lop in3 = null;

    if (input11 instanceof BinaryOp && ((BinaryOp) input11).getOp() == OpOp2.MULT) {
      in1 = input11.getInput().get(0).constructLops();
      in2 = input11.getInput().get(1).constructLops();
      in3 = input12.constructLops();
    } else if (input12 instanceof BinaryOp && ((BinaryOp) input12).getOp() == OpOp2.MULT) {
      in1 = input11.constructLops();
      in2 = input12.getInput().get(0).constructLops();
      in3 = input12.getInput().get(1).constructLops();
    } else {
      in1 = input11.constructLops();
      in2 = input12.constructLops();
      in3 = new LiteralOp(1).constructLops();
    }

    // create new ternary aggregate operator
    int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
    // The execution type of a unary aggregate instruction should depend on the execution type of
    // inputs to avoid OOM
    // Since we only support matrix-vector and not vector-matrix, checking the execution type of
    // input1 should suffice.
    ExecType et_input = input1.optFindExecType();
    ret =
        new TernaryAggregate(
            in1,
            in2,
            in3,
            Aggregate.OperationTypes.KahanSum,
            Binary.OperationTypes.MULTIPLY,
            DataType.SCALAR,
            ValueType.DOUBLE,
            et_input,
            k);

    return ret;
  }
  /**
   * @return
   * @throws HopsException
   * @throws LopsException
   */
  private Lop constructLopsSparkCumulativeUnary() throws HopsException, LopsException {
    Hop input = getInput().get(0);
    long rlen = input.getDim1();
    long clen = input.getDim2();
    long brlen = input.getRowsInBlock();
    long bclen = input.getColsInBlock();
    boolean force = !dimsKnown() || _etypeForced == ExecType.SPARK;
    OperationTypes aggtype = getCumulativeAggType();

    Lop X = input.constructLops();
    Lop TEMP = X;
    ArrayList<Lop> DATA = new ArrayList<Lop>();
    int level = 0;

    // recursive preaggregation until aggregates fit into CP memory budget
    while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen)
                    + OptimizerUtils.estimateSize(1, clen))
                > OptimizerUtils.getLocalMemBudget()
            && TEMP.getOutputParameters().getNumRows() > 1)
        || force) {
      DATA.add(TEMP);

      // preaggregation per block (for spark, the CumulativePartialAggregate subsumes both
      // the preaggregation and subsequent block aggregation)
      long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen);
      Lop preagg =
          new CumulativePartialAggregate(
              TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.SPARK);
      preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
      setLineNumbers(preagg);

      TEMP = preagg;
      level++;
      force = false; // in case of unknowns, generate one level
    }

    // in-memory cum sum (of partial aggregates)
    if (TEMP.getOutputParameters().getNumRows() != 1) {
      int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
      Unary unary1 =
          new Unary(
              TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k);
      unary1
          .getOutputParameters()
          .setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1);
      setLineNumbers(unary1);
      TEMP = unary1;
    }

    // split, group and mr cumsum
    while (level-- > 0) {
      // (for spark, the CumulativeOffsetBinary subsumes both the split aggregate and
      // the subsequent offset binary apply of split aggregates against the original data)
      double initValue = getCumulativeInitValue();
      CumulativeOffsetBinary binary =
          new CumulativeOffsetBinary(
              DATA.get(level),
              TEMP,
              DataType.MATRIX,
              ValueType.DOUBLE,
              initValue,
              aggtype,
              ExecType.SPARK);
      binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
      setLineNumbers(binary);
      TEMP = binary;
    }

    return TEMP;
  }
  @Override
  public Lop constructLops() throws HopsException, LopsException {
    // return already created lops
    if (getLops() != null) return getLops();

    try {
      ExecType et = optFindExecType();
      Hop input = getInput().get(0);

      if (et == ExecType.CP) {
        Lop agg1 = null;
        if (isTernaryAggregateRewriteApplicable()) {
          agg1 = constructLopsTernaryAggregateRewrite(et);
        } else if (isUnaryAggregateOuterCPRewriteApplicable()) {
          OperationTypes op = HopsAgg2Lops.get(_op);
          DirectionTypes dir = HopsDirection2Lops.get(_direction);

          BinaryOp binput = (BinaryOp) getInput().get(0);
          agg1 =
              new UAggOuterChain(
                  binput.getInput().get(0).constructLops(),
                  binput.getInput().get(1).constructLops(),
                  op,
                  dir,
                  HopsOpOp2LopsB.get(binput.getOp()),
                  DataType.MATRIX,
                  getValueType(),
                  ExecType.CP);
          PartialAggregate.setDimensionsBasedOnDirection(
              agg1, getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), dir);

          if (getDataType() == DataType.SCALAR) {
            UnaryCP unary1 =
                new UnaryCP(
                    agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
            unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
            setLineNumbers(unary1);
            setLops(unary1);
          }

        } else { // general case
          int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
          if (DMLScript.USE_ACCELERATOR
              && (DMLScript.FORCE_ACCELERATOR
                  || getMemEstimate() < OptimizerUtils.GPU_MEMORY_BUDGET)
              && (_op == AggOp.SUM)) {
            et = ExecType.GPU;
            k = 1;
          }
          agg1 =
              new PartialAggregate(
                  input.constructLops(),
                  HopsAgg2Lops.get(_op),
                  HopsDirection2Lops.get(_direction),
                  getDataType(),
                  getValueType(),
                  et,
                  k);
        }

        setOutputDimensions(agg1);
        setLineNumbers(agg1);
        setLops(agg1);

        if (getDataType() == DataType.SCALAR) {
          agg1.getOutputParameters()
              .setDimensions(1, 1, getRowsInBlock(), getColsInBlock(), getNnz());
        }
      } else if (et == ExecType.MR) {
        OperationTypes op = HopsAgg2Lops.get(_op);
        DirectionTypes dir = HopsDirection2Lops.get(_direction);

        // unary aggregate operation
        Lop transform1 = null;
        if (isUnaryAggregateOuterRewriteApplicable()) {
          BinaryOp binput = (BinaryOp) getInput().get(0);
          transform1 =
              new UAggOuterChain(
                  binput.getInput().get(0).constructLops(),
                  binput.getInput().get(1).constructLops(),
                  op,
                  dir,
                  HopsOpOp2LopsB.get(binput.getOp()),
                  DataType.MATRIX,
                  getValueType(),
                  ExecType.MR);
          PartialAggregate.setDimensionsBasedOnDirection(
              transform1,
              getDim1(),
              getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              dir);
        } else // default
        {
          transform1 =
              new PartialAggregate(input.constructLops(), op, dir, DataType.MATRIX, getValueType());
          ((PartialAggregate) transform1)
              .setDimensionsBasedOnDirection(
                  getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock());
        }
        setLineNumbers(transform1);

        // aggregation if required
        Lop aggregate = null;
        Group group1 = null;
        Aggregate agg1 = null;
        if (requiresAggregation(input, _direction) || transform1 instanceof UAggOuterChain) {
          group1 =
              new Group(transform1, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
          group1
              .getOutputParameters()
              .setDimensions(
                  getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), getNnz());
          setLineNumbers(group1);

          agg1 = new Aggregate(group1, HopsAgg2Lops.get(_op), DataType.MATRIX, getValueType(), et);
          agg1.getOutputParameters()
              .setDimensions(
                  getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), getNnz());
          agg1.setupCorrectionLocation(PartialAggregate.getCorrectionLocation(op, dir));
          setLineNumbers(agg1);

          aggregate = agg1;
        } else {
          ((PartialAggregate) transform1).setDropCorrection();
          aggregate = transform1;
        }

        setLops(aggregate);

        // cast if required
        if (getDataType() == DataType.SCALAR) {

          // Set the dimensions of PartialAggregate LOP based on the
          // direction in which aggregation is performed
          PartialAggregate.setDimensionsBasedOnDirection(
              transform1,
              input.getDim1(),
              input.getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              dir);

          if (group1 != null && agg1 != null) { // if aggregation required
            group1
                .getOutputParameters()
                .setDimensions(
                    input.getDim1(),
                    input.getDim2(),
                    input.getRowsInBlock(),
                    input.getColsInBlock(),
                    getNnz());
            agg1.getOutputParameters()
                .setDimensions(1, 1, input.getRowsInBlock(), input.getColsInBlock(), getNnz());
          }

          UnaryCP unary1 =
              new UnaryCP(
                  aggregate,
                  HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR),
                  getDataType(),
                  getValueType());
          unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
          setLineNumbers(unary1);
          setLops(unary1);
        }
      } else if (et == ExecType.SPARK) {
        OperationTypes op = HopsAgg2Lops.get(_op);
        DirectionTypes dir = HopsDirection2Lops.get(_direction);

        // unary aggregate
        if (isTernaryAggregateRewriteApplicable()) {
          Lop aggregate = constructLopsTernaryAggregateRewrite(et);
          setOutputDimensions(aggregate); // 0x0 (scalar)
          setLineNumbers(aggregate);
          setLops(aggregate);
        } else if (isUnaryAggregateOuterSPRewriteApplicable()) {
          BinaryOp binput = (BinaryOp) getInput().get(0);
          Lop transform1 =
              new UAggOuterChain(
                  binput.getInput().get(0).constructLops(),
                  binput.getInput().get(1).constructLops(),
                  op,
                  dir,
                  HopsOpOp2LopsB.get(binput.getOp()),
                  DataType.MATRIX,
                  getValueType(),
                  ExecType.SPARK);
          PartialAggregate.setDimensionsBasedOnDirection(
              transform1,
              getDim1(),
              getDim2(),
              input.getRowsInBlock(),
              input.getColsInBlock(),
              dir);
          setLineNumbers(transform1);
          setLops(transform1);

          if (getDataType() == DataType.SCALAR) {
            UnaryCP unary1 =
                new UnaryCP(
                    transform1,
                    HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR),
                    getDataType(),
                    getValueType());
            unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
            setLineNumbers(unary1);
            setLops(unary1);
          }

        } else // default
        {
          boolean needAgg = requiresAggregation(input, _direction);
          SparkAggType aggtype = getSparkUnaryAggregationType(needAgg);

          PartialAggregate aggregate =
              new PartialAggregate(
                  input.constructLops(),
                  HopsAgg2Lops.get(_op),
                  HopsDirection2Lops.get(_direction),
                  DataType.MATRIX,
                  getValueType(),
                  aggtype,
                  et);
          aggregate.setDimensionsBasedOnDirection(
              getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock());
          setLineNumbers(aggregate);
          setLops(aggregate);

          if (getDataType() == DataType.SCALAR) {
            UnaryCP unary1 =
                new UnaryCP(
                    aggregate,
                    HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR),
                    getDataType(),
                    getValueType());
            unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
            setLineNumbers(unary1);
            setLops(unary1);
          }
        }
      }
    } catch (Exception e) {
      throw new HopsException(
          this.printErrorLocation() + "In AggUnary Hop, error constructing Lops ", e);
    }

    // add reblock/checkpoint lops if necessary
    constructAndSetLopsDataFlowProperties();

    // return created lops
    return getLops();
  }
  @Override
  public Lop constructLops() throws HopsException, LopsException {
    // return already created lops
    if (getLops() != null) return getLops();

    Hop input = getInput().get(0);

    // rewrite remove unnecessary right indexing
    if (dimsKnown()
        && input.dimsKnown()
        && getDim1() == input.getDim1()
        && getDim2() == input.getDim2()) {
      setLops(input.constructLops());
    }
    // actual lop construction, incl operator selection
    else {
      try {
        ExecType et = optFindExecType();
        if (et == ExecType.MR) {
          IndexingMethod method =
              optFindIndexingMethod(
                  _rowLowerEqualsUpper,
                  _colLowerEqualsUpper,
                  input._dim1,
                  input._dim2,
                  _dim1,
                  _dim2);

          Lop dummy = Data.createLiteralLop(ValueType.INT, Integer.toString(-1));
          RangeBasedReIndex reindex =
              new RangeBasedReIndex(
                  input.constructLops(),
                  getInput().get(1).constructLops(),
                  getInput().get(2).constructLops(),
                  getInput().get(3).constructLops(),
                  getInput().get(4).constructLops(),
                  dummy,
                  dummy,
                  getDataType(),
                  getValueType(),
                  et);

          setOutputDimensions(reindex);
          setLineNumbers(reindex);

          if (method == IndexingMethod.MR_RIX) {
            Group group1 =
                new Group(reindex, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
            setOutputDimensions(group1);
            setLineNumbers(group1);

            Aggregate agg1 =
                new Aggregate(
                    group1, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), et);
            setOutputDimensions(agg1);
            setLineNumbers(agg1);

            setLops(agg1);
          } else // method == IndexingMethod.MR_VRIX
          {
            setLops(reindex);
          }
        } else if (et == ExecType.SPARK) {
          IndexingMethod method =
              optFindIndexingMethod(
                  _rowLowerEqualsUpper,
                  _colLowerEqualsUpper,
                  input._dim1,
                  input._dim2,
                  _dim1,
                  _dim2);
          SparkAggType aggtype =
              (method == IndexingMethod.MR_VRIX) ? SparkAggType.NONE : SparkAggType.MULTI_BLOCK;

          Lop dummy = Data.createLiteralLop(ValueType.INT, Integer.toString(-1));
          RangeBasedReIndex reindex =
              new RangeBasedReIndex(
                  input.constructLops(),
                  getInput().get(1).constructLops(),
                  getInput().get(2).constructLops(),
                  getInput().get(3).constructLops(),
                  getInput().get(4).constructLops(),
                  dummy,
                  dummy,
                  getDataType(),
                  getValueType(),
                  aggtype,
                  et);

          setOutputDimensions(reindex);
          setLineNumbers(reindex);
          setLops(reindex);
        } else // CP
        {
          Lop dummy = Data.createLiteralLop(ValueType.INT, Integer.toString(-1));
          RangeBasedReIndex reindex =
              new RangeBasedReIndex(
                  input.constructLops(),
                  getInput().get(1).constructLops(),
                  getInput().get(2).constructLops(),
                  getInput().get(3).constructLops(),
                  getInput().get(4).constructLops(),
                  dummy,
                  dummy,
                  getDataType(),
                  getValueType(),
                  et);

          setOutputDimensions(reindex);
          setLineNumbers(reindex);
          setLops(reindex);
        }
      } catch (Exception e) {
        throw new HopsException(
            this.printErrorLocation() + "In IndexingOp Hop, error constructing Lops ", e);
      }
    }

    // add reblock/checkpoint lops if necessary
    constructAndSetLopsDataFlowProperties();

    return getLops();
  }