public void init(Lop input1, Lop input2, DataType dt, ValueType vt, ExecType et) { addInput(input1); input1.addOutput(this); addInput(input2); input2.addOutput(this); boolean breaksAlignment = false; boolean aligner = false; boolean definesMRJob = false; if (et == ExecType.MR) { lps.addCompatibility(JobType.GMR); lps.addCompatibility(JobType.DATAGEN); // currently required for correctness lps.setProperties( inputs, ExecType.MR, ExecLocation.Reduce, breaksAlignment, aligner, definesMRJob); } else // SP { lps.addCompatibility(JobType.INVALID); lps.setProperties( inputs, ExecType.SPARK, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob); } }
public boolean hasNonBlockedInputs() { for (Lop in : getInputs()) if (in.getDataType() == DataType.MATRIX && !in.getOutputParameters().isBlocked()) return true; return false; }
/** * Method to recursively add LOPS to a DAG * * @param dag */ public final void addToDag(Dag<Lop> dag) { if (dag.addNode(this)) for (Lop l : getInputs()) l.addToDag(dag); }
/** * @return * @throws HopsException * @throws LopsException */ private Lop constructLopsSparkCumulativeUnary() throws HopsException, LopsException { Hop input = getInput().get(0); long rlen = input.getDim1(); long clen = input.getDim2(); long brlen = input.getRowsInBlock(); long bclen = input.getColsInBlock(); boolean force = !dimsKnown() || _etypeForced == ExecType.SPARK; OperationTypes aggtype = getCumulativeAggType(); Lop X = input.constructLops(); Lop TEMP = X; ArrayList<Lop> DATA = new ArrayList<Lop>(); int level = 0; // recursive preaggregation until aggregates fit into CP memory budget while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) { DATA.add(TEMP); // preaggregation per block (for spark, the CumulativePartialAggregate subsumes both // the preaggregation and subsequent block aggregation) long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen); Lop preagg = new CumulativePartialAggregate( TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.SPARK); preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); setLineNumbers(preagg); TEMP = preagg; level++; force = false; // in case of unknowns, generate one level } // in-memory cum sum (of partial aggregates) if (TEMP.getOutputParameters().getNumRows() != 1) { Unary unary1 = new Unary(TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP); unary1 .getOutputParameters() .setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1); setLineNumbers(unary1); TEMP = unary1; } // split, group and mr cumsum while (level-- > 0) { // (for spark, the CumulativeOffsetBinary subsumes both the split aggregate and // the subsequent offset binary apply of split aggregates against the original data) double initValue = getCumulativeInitValue(); CumulativeOffsetBinary binary = new CumulativeOffsetBinary( DATA.get(level), TEMP, DataType.MATRIX, ValueType.DOUBLE, initValue, aggtype, ExecType.SPARK); binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(binary); TEMP = binary; } return TEMP; }
/** * MR Cumsum is currently based on a multipass algorithm of (1) preaggregation and (2) subsequent * offsetting. Note that we currently support one robust physical operator but many alternative * realizations are possible for specific scenarios (e.g., when the preaggregated intermediate fit * into the map task memory budget) or by creating custom job types. * * @return * @throws HopsException * @throws LopsException */ private Lop constructLopsMRCumulativeUnary() throws HopsException, LopsException { Hop input = getInput().get(0); long rlen = input.getDim1(); long clen = input.getDim2(); long brlen = input.getRowsInBlock(); long bclen = input.getColsInBlock(); boolean force = !dimsKnown() || _etypeForced == ExecType.MR; OperationTypes aggtype = getCumulativeAggType(); Lop X = input.constructLops(); Lop TEMP = X; ArrayList<Lop> DATA = new ArrayList<Lop>(); int level = 0; // recursive preaggregation until aggregates fit into CP memory budget while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) { DATA.add(TEMP); // preaggregation per block long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen); Lop preagg = new CumulativePartialAggregate( TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR); preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); setLineNumbers(preagg); Group group = new Group(preagg, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE); group.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); setLineNumbers(group); Aggregate agg = new Aggregate( group, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR); agg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1); agg.setupCorrectionLocation( CorrectionLocationType .NONE); // aggregation uses kahanSum but the inputs do not have correction values setLineNumbers(agg); TEMP = agg; level++; force = false; // in case of unknowns, generate one level } // in-memory cum sum (of partial aggregates) if (TEMP.getOutputParameters().getNumRows() != 1) { Unary unary1 = new Unary(TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP); unary1 .getOutputParameters() .setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1); setLineNumbers(unary1); TEMP = unary1; } // split, group and mr cumsum while (level-- > 0) { double init = getCumulativeInitValue(); CumulativeSplitAggregate split = new CumulativeSplitAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, init); split.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(split); Group group1 = new Group(DATA.get(level), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE); group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(group1); Group group2 = new Group(split, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE); group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(group2); CumulativeOffsetBinary binary = new CumulativeOffsetBinary( group1, group2, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR); binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1); setLineNumbers(binary); TEMP = binary; } return TEMP; }