/** * @return * @throws HopsException * @throws LopsException */ private Lop constructLopsSparkCumulativeUnary() throws HopsException, LopsException { Hop input = getInput().get(0); long rlen = input.getDim1(); long clen = input.getDim2(); long brlen = input.getRowsInBlock(); long bclen = input.getColsInBlock(); boolean force = !dimsKnown() || _etypeForced == ExecType.SPARK; OperationTypes aggtype = getCumulativeAggType(); Lop X = input.constructLops(); Lop TEMP = X; ArrayList<Lop> DATA = new ArrayList<Lop>(); int level = 0; // recursive preaggregation until aggregates fit into CP memory budget while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) { DATA.add(TEMP); // preaggregation per block (for spark, the CumulativePartialAggregate subsumes both // the preaggregation and subsequent block aggregation) long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen); Lop preagg = new CumulativePartialAggregate( TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.SPARK); preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); setLineNumbers(preagg); TEMP = preagg; level++; force = false; // in case of unknowns, generate one level } // in-memory cum sum (of partial aggregates) if (TEMP.getOutputParameters().getNumRows() != 1) { int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); Unary unary1 = new Unary( TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k); unary1 .getOutputParameters() .setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1); setLineNumbers(unary1); TEMP = unary1; } // split, group and mr cumsum while (level-- > 0) { // (for spark, the CumulativeOffsetBinary subsumes both the split aggregate and // the subsequent offset binary apply of split aggregates against the original data) double initValue = getCumulativeInitValue(); CumulativeOffsetBinary binary = new CumulativeOffsetBinary( DATA.get(level), TEMP, DataType.MATRIX, ValueType.DOUBLE, initValue, aggtype, ExecType.SPARK); binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(binary); TEMP = binary; } return TEMP; }
@Override public Lop constructLops() throws HopsException, LopsException { // return already created lops if (getLops() != null) return getLops(); try { ExecType et = optFindExecType(); Hop input = getInput().get(0); if (et == ExecType.CP) { Lop agg1 = null; if (isTernaryAggregateRewriteApplicable()) { agg1 = constructLopsTernaryAggregateRewrite(et); } else if (isUnaryAggregateOuterCPRewriteApplicable()) { OperationTypes op = HopsAgg2Lops.get(_op); DirectionTypes dir = HopsDirection2Lops.get(_direction); BinaryOp binput = (BinaryOp) getInput().get(0); agg1 = new UAggOuterChain( binput.getInput().get(0).constructLops(), binput.getInput().get(1).constructLops(), op, dir, HopsOpOp2LopsB.get(binput.getOp()), DataType.MATRIX, getValueType(), ExecType.CP); PartialAggregate.setDimensionsBasedOnDirection( agg1, getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), dir); if (getDataType() == DataType.SCALAR) { UnaryCP unary1 = new UnaryCP( agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType()); unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1); setLineNumbers(unary1); setLops(unary1); } } else { // general case int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || getMemEstimate() < OptimizerUtils.GPU_MEMORY_BUDGET) && (_op == AggOp.SUM)) { et = ExecType.GPU; k = 1; } agg1 = new PartialAggregate( input.constructLops(), HopsAgg2Lops.get(_op), HopsDirection2Lops.get(_direction), getDataType(), getValueType(), et, k); } setOutputDimensions(agg1); setLineNumbers(agg1); setLops(agg1); if (getDataType() == DataType.SCALAR) { agg1.getOutputParameters() .setDimensions(1, 1, getRowsInBlock(), getColsInBlock(), getNnz()); } } else if (et == ExecType.MR) { OperationTypes op = HopsAgg2Lops.get(_op); DirectionTypes dir = HopsDirection2Lops.get(_direction); // unary aggregate operation Lop transform1 = null; if (isUnaryAggregateOuterRewriteApplicable()) { BinaryOp binput = (BinaryOp) getInput().get(0); transform1 = new UAggOuterChain( binput.getInput().get(0).constructLops(), binput.getInput().get(1).constructLops(), op, dir, HopsOpOp2LopsB.get(binput.getOp()), DataType.MATRIX, getValueType(), ExecType.MR); PartialAggregate.setDimensionsBasedOnDirection( transform1, getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), dir); } else // default { transform1 = new PartialAggregate(input.constructLops(), op, dir, DataType.MATRIX, getValueType()); ((PartialAggregate) transform1) .setDimensionsBasedOnDirection( getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock()); } setLineNumbers(transform1); // aggregation if required Lop aggregate = null; Group group1 = null; Aggregate agg1 = null; if (requiresAggregation(input, _direction) || transform1 instanceof UAggOuterChain) { group1 = new Group(transform1, Group.OperationTypes.Sort, DataType.MATRIX, getValueType()); group1 .getOutputParameters() .setDimensions( getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), getNnz()); setLineNumbers(group1); agg1 = new Aggregate(group1, HopsAgg2Lops.get(_op), DataType.MATRIX, getValueType(), et); agg1.getOutputParameters() .setDimensions( getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), getNnz()); agg1.setupCorrectionLocation(PartialAggregate.getCorrectionLocation(op, dir)); setLineNumbers(agg1); aggregate = agg1; } else { ((PartialAggregate) transform1).setDropCorrection(); aggregate = transform1; } setLops(aggregate); // cast if required if (getDataType() == DataType.SCALAR) { // Set the dimensions of PartialAggregate LOP based on the // direction in which aggregation is performed PartialAggregate.setDimensionsBasedOnDirection( transform1, input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), dir); if (group1 != null && agg1 != null) { // if aggregation required group1 .getOutputParameters() .setDimensions( input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), getNnz()); agg1.getOutputParameters() .setDimensions(1, 1, input.getRowsInBlock(), input.getColsInBlock(), getNnz()); } UnaryCP unary1 = new UnaryCP( aggregate, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType()); unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1); setLineNumbers(unary1); setLops(unary1); } } else if (et == ExecType.SPARK) { OperationTypes op = HopsAgg2Lops.get(_op); DirectionTypes dir = HopsDirection2Lops.get(_direction); // unary aggregate if (isTernaryAggregateRewriteApplicable()) { Lop aggregate = constructLopsTernaryAggregateRewrite(et); setOutputDimensions(aggregate); // 0x0 (scalar) setLineNumbers(aggregate); setLops(aggregate); } else if (isUnaryAggregateOuterSPRewriteApplicable()) { BinaryOp binput = (BinaryOp) getInput().get(0); Lop transform1 = new UAggOuterChain( binput.getInput().get(0).constructLops(), binput.getInput().get(1).constructLops(), op, dir, HopsOpOp2LopsB.get(binput.getOp()), DataType.MATRIX, getValueType(), ExecType.SPARK); PartialAggregate.setDimensionsBasedOnDirection( transform1, getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), dir); setLineNumbers(transform1); setLops(transform1); if (getDataType() == DataType.SCALAR) { UnaryCP unary1 = new UnaryCP( transform1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType()); unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1); setLineNumbers(unary1); setLops(unary1); } } else // default { boolean needAgg = requiresAggregation(input, _direction); SparkAggType aggtype = getSparkUnaryAggregationType(needAgg); PartialAggregate aggregate = new PartialAggregate( input.constructLops(), HopsAgg2Lops.get(_op), HopsDirection2Lops.get(_direction), DataType.MATRIX, getValueType(), aggtype, et); aggregate.setDimensionsBasedOnDirection( getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock()); setLineNumbers(aggregate); setLops(aggregate); if (getDataType() == DataType.SCALAR) { UnaryCP unary1 = new UnaryCP( aggregate, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType()); unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1); setLineNumbers(unary1); setLops(unary1); } } } } catch (Exception e) { throw new HopsException( this.printErrorLocation() + "In AggUnary Hop, error constructing Lops ", e); } // add reblock/checkpoint lops if necessary constructAndSetLopsDataFlowProperties(); // return created lops return getLops(); }
/** * MR Cumsum is currently based on a multipass algorithm of (1) preaggregation and (2) subsequent * offsetting. Note that we currently support one robust physical operator but many alternative * realizations are possible for specific scenarios (e.g., when the preaggregated intermediate fit * into the map task memory budget) or by creating custom job types. * * @return * @throws HopsException * @throws LopsException */ private Lop constructLopsMRCumulativeUnary() throws HopsException, LopsException { Hop input = getInput().get(0); long rlen = input.getDim1(); long clen = input.getDim2(); long brlen = input.getRowsInBlock(); long bclen = input.getColsInBlock(); boolean force = !dimsKnown() || _etypeForced == ExecType.MR; OperationTypes aggtype = getCumulativeAggType(); Lop X = input.constructLops(); Lop TEMP = X; ArrayList<Lop> DATA = new ArrayList<Lop>(); int level = 0; // recursive preaggregation until aggregates fit into CP memory budget while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) { DATA.add(TEMP); // preaggregation per block long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen); Lop preagg = new CumulativePartialAggregate( TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR); preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); setLineNumbers(preagg); Group group = new Group(preagg, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE); group.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); setLineNumbers(group); Aggregate agg = new Aggregate( group, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR); agg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); agg.setupCorrectionLocation( CorrectionLocationType .NONE); // aggregation uses kahanSum but the inputs do not have correction values setLineNumbers(agg); TEMP = agg; level++; force = false; // in case of unknowns, generate one level } // in-memory cum sum (of partial aggregates) if (TEMP.getOutputParameters().getNumRows() != 1) { int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); Unary unary1 = new Unary( TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k); unary1 .getOutputParameters() .setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1); setLineNumbers(unary1); TEMP = unary1; } // split, group and mr cumsum while (level-- > 0) { double init = getCumulativeInitValue(); CumulativeSplitAggregate split = new CumulativeSplitAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, init); split.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(split); Group group1 = new Group(DATA.get(level), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE); group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(group1); Group group2 = new Group(split, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE); group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(group2); CumulativeOffsetBinary binary = new CumulativeOffsetBinary( group1, group2, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR); binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(binary); TEMP = binary; } return TEMP; }