/** * MR Cumsum is currently based on a multipass algorithm of (1) preaggregation and (2) subsequent * offsetting. Note that we currently support one robust physical operator but many alternative * realizations are possible for specific scenarios (e.g., when the preaggregated intermediate fit * into the map task memory budget) or by creating custom job types. * * @return * @throws HopsException * @throws LopsException */ private Lop constructLopsMRCumulativeUnary() throws HopsException, LopsException { Hop input = getInput().get(0); long rlen = input.getDim1(); long clen = input.getDim2(); long brlen = input.getRowsInBlock(); long bclen = input.getColsInBlock(); boolean force = !dimsKnown() || _etypeForced == ExecType.MR; OperationTypes aggtype = getCumulativeAggType(); Lop X = input.constructLops(); Lop TEMP = X; ArrayList<Lop> DATA = new ArrayList<Lop>(); int level = 0; // recursive preaggregation until aggregates fit into CP memory budget while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) { DATA.add(TEMP); // preaggregation per block long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen); Lop preagg = new CumulativePartialAggregate( TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR); preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); setLineNumbers(preagg); Group group = new Group(preagg, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE); group.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); setLineNumbers(group); Aggregate agg = new Aggregate( group, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR); agg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); agg.setupCorrectionLocation( CorrectionLocationType .NONE); // aggregation uses kahanSum but the inputs do not have correction values setLineNumbers(agg); TEMP = agg; level++; force = false; // in case of unknowns, generate one level } // in-memory cum sum (of partial aggregates) if (TEMP.getOutputParameters().getNumRows() != 1) { int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); Unary unary1 = new Unary( TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k); unary1 .getOutputParameters() .setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1); setLineNumbers(unary1); TEMP = unary1; } // split, group and mr cumsum while (level-- > 0) { double init = getCumulativeInitValue(); CumulativeSplitAggregate split = new CumulativeSplitAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, init); split.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(split); Group group1 = new Group(DATA.get(level), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE); group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(group1); Group group2 = new Group(split, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE); group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(group2); CumulativeOffsetBinary binary = new CumulativeOffsetBinary( group1, group2, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR); binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(binary); TEMP = binary; } return TEMP; }
/** * @return * @throws HopsException * @throws LopsException */ private Lop constructLopsSparkCumulativeUnary() throws HopsException, LopsException { Hop input = getInput().get(0); long rlen = input.getDim1(); long clen = input.getDim2(); long brlen = input.getRowsInBlock(); long bclen = input.getColsInBlock(); boolean force = !dimsKnown() || _etypeForced == ExecType.SPARK; OperationTypes aggtype = getCumulativeAggType(); Lop X = input.constructLops(); Lop TEMP = X; ArrayList<Lop> DATA = new ArrayList<Lop>(); int level = 0; // recursive preaggregation until aggregates fit into CP memory budget while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) { DATA.add(TEMP); // preaggregation per block (for spark, the CumulativePartialAggregate subsumes both // the preaggregation and subsequent block aggregation) long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen); Lop preagg = new CumulativePartialAggregate( TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.SPARK); preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); setLineNumbers(preagg); TEMP = preagg; level++; force = false; // in case of unknowns, generate one level } // in-memory cum sum (of partial aggregates) if (TEMP.getOutputParameters().getNumRows() != 1) { int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); Unary unary1 = new Unary( TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k); unary1 .getOutputParameters() .setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1); setLineNumbers(unary1); TEMP = unary1; } // split, group and mr cumsum while (level-- > 0) { // (for spark, the CumulativeOffsetBinary subsumes both the split aggregate and // the subsequent offset binary apply of split aggregates against the original data) double initValue = getCumulativeInitValue(); CumulativeOffsetBinary binary = new CumulativeOffsetBinary( DATA.get(level), TEMP, DataType.MATRIX, ValueType.DOUBLE, initValue, aggtype, ExecType.SPARK); binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(binary); TEMP = binary; } return TEMP; }
private Lop constructLopsIQM() throws HopsException, LopsException { ExecType et = optFindExecType(); Hop input = getInput().get(0); if (et == ExecType.MR) { CombineUnary combine = CombineUnary.constructCombineLop(input.constructLops(), DataType.MATRIX, getValueType()); combine .getOutputParameters() .setDimensions( input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz()); SortKeys sort = SortKeys.constructSortByValueLop( combine, SortKeys.OperationTypes.WithoutWeights, DataType.MATRIX, ValueType.DOUBLE, ExecType.MR); // Sort dimensions are same as the first input sort.getOutputParameters() .setDimensions( input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz()); Data lit = Data.createLiteralLop(ValueType.DOUBLE, Double.toString(0.25)); lit.setAllPositions( this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn()); PickByCount pick = new PickByCount( sort, lit, DataType.MATRIX, getValueType(), PickByCount.OperationTypes.RANGEPICK); pick.getOutputParameters().setDimensions(-1, -1, getRowsInBlock(), getColsInBlock(), -1); setLineNumbers(pick); PartialAggregate pagg = new PartialAggregate( pick, HopsAgg2Lops.get(Hop.AggOp.SUM), HopsDirection2Lops.get(Hop.Direction.RowCol), DataType.MATRIX, getValueType()); setLineNumbers(pagg); // Set the dimensions of PartialAggregate LOP based on the // direction in which aggregation is performed pagg.setDimensionsBasedOnDirection(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock()); Group group1 = new Group(pagg, Group.OperationTypes.Sort, DataType.MATRIX, getValueType()); group1 .getOutputParameters() .setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz()); setLineNumbers(group1); Aggregate agg1 = new Aggregate( group1, HopsAgg2Lops.get(Hop.AggOp.SUM), DataType.MATRIX, getValueType(), ExecType.MR); agg1.getOutputParameters() .setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz()); agg1.setupCorrectionLocation(pagg.getCorrectionLocation()); setLineNumbers(agg1); UnaryCP unary1 = new UnaryCP( agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType()); unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1); setLineNumbers(unary1); Unary iqm = new Unary( sort, unary1, Unary.OperationTypes.MR_IQM, DataType.SCALAR, ValueType.DOUBLE, ExecType.CP); iqm.getOutputParameters().setDimensions(0, 0, 0, 0, -1); setLineNumbers(iqm); return iqm; } else { SortKeys sort = SortKeys.constructSortByValueLop( input.constructLops(), SortKeys.OperationTypes.WithoutWeights, DataType.MATRIX, ValueType.DOUBLE, et); sort.getOutputParameters() .setDimensions( input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz()); PickByCount pick = new PickByCount( sort, null, getDataType(), getValueType(), PickByCount.OperationTypes.IQM, et, true); pick.getOutputParameters() .setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz()); setLineNumbers(pick); return pick; } }