@Override
  public void processInstruction(ExecutionContext ec)
      throws DMLRuntimeException, DMLUnsupportedOperationException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;

    String rddVar = (_type == CacheType.LEFT) ? input2.getName() : input1.getName();
    String bcastVar = (_type == CacheType.LEFT) ? input1.getName() : input2.getName();
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(output.getName());
    long rlen =
        sec.getScalarInput(_nrow.getName(), _nrow.getValueType(), _nrow.isLiteral()).getLongValue();

    // get inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
    PartitionedBroadcastMatrix in2 = sec.getBroadcastForVariable(bcastVar);

    // execute pmm instruction
    JavaPairRDD<MatrixIndexes, MatrixBlock> out =
        in1.flatMapToPair(new RDDPMMFunction(_type, in2, rlen, mc.getRowsPerBlock()));
    out = RDDAggregateUtils.sumByKeyStable(out);

    // put output RDD handle into symbol table
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), rddVar);
    sec.addLineageBroadcast(output.getName(), bcastVar);

    // update output statistics if not inferred
    updateBinaryMMOutputMatrixCharacteristics(sec, false);
  }
  /**
   * @param pfid
   * @param program
   * @param taskFile
   * @param resultFile
   * @param enableCPCaching
   * @param mode
   * @param numMappers
   * @param replication
   * @return
   * @throws DMLRuntimeException
   * @throws DMLUnsupportedOperationException
   */
  public static RemoteParForJobReturn runJob(
      long pfid,
      String itervar,
      String matrixvar,
      String program,
      String resultFile,
      MatrixObject input,
      ExecutionContext ec,
      PDataPartitionFormat dpf,
      OutputInfo oi,
      boolean tSparseCol, // config params
      boolean enableCPCaching,
      int numReducers) // opt params
      throws DMLRuntimeException, DMLUnsupportedOperationException {
    String jobname = "ParFor-DPESP";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;

    SparkExecutionContext sec = (SparkExecutionContext) ec;
    JavaSparkContext sc = sec.getSparkContext();

    // prepare input parameters
    MatrixDimensionsMetaData md = (MatrixDimensionsMetaData) input.getMetaData();
    MatrixCharacteristics mc = md.getMatrixCharacteristics();
    InputInfo ii = InputInfo.BinaryBlockInputInfo;

    // initialize accumulators for tasks/iterations
    Accumulator<Integer> aTasks = sc.accumulator(0);
    Accumulator<Integer> aIters = sc.accumulator(0);

    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(matrixvar);
    DataPartitionerRemoteSparkMapper dpfun = new DataPartitionerRemoteSparkMapper(mc, ii, oi, dpf);
    RemoteDPParForSparkWorker efun =
        new RemoteDPParForSparkWorker(
            program, matrixvar, itervar, enableCPCaching, mc, tSparseCol, dpf, oi, aTasks, aIters);
    List<Tuple2<Long, String>> out =
        in.flatMapToPair(dpfun) // partition the input blocks
            .groupByKey(numReducers) // group partition blocks 		
            .mapPartitionsToPair(efun) // execute parfor tasks, incl cleanup
            .collect(); // get output handles

    // de-serialize results
    LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
    int numTasks = aTasks.value(); // get accumulator value
    int numIters = aIters.value(); // get accumulator value

    // create output symbol table entries
    RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);

    // maintain statistics
    Statistics.incrementNoOfCompiledSPInst();
    Statistics.incrementNoOfExecutedSPInst();
    if (DMLScript.STATISTICS) {
      Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
    }

    return ret;
  }
  @Override
  public void processInstruction(ExecutionContext ec)
      throws DMLRuntimeException, DMLUnsupportedOperationException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;

    String rddVar = (_type == CacheType.LEFT) ? input2.getName() : input1.getName();
    String bcastVar = (_type == CacheType.LEFT) ? input1.getName() : input2.getName();
    MatrixCharacteristics mcRdd = sec.getMatrixCharacteristics(rddVar);
    MatrixCharacteristics mcBc = sec.getMatrixCharacteristics(bcastVar);

    // get inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
    PartitionedBroadcastMatrix in2 = sec.getBroadcastForVariable(bcastVar);

    // empty input block filter
    if (!_outputEmpty) in1 = in1.filter(new FilterNonEmptyBlocksFunction());

    // execute mapmult instruction
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (requiresFlatMapFunction(_type, mcBc))
      out = in1.flatMapToPair(new RDDFlatMapMMFunction(_type, in2));
    else if (preservesPartitioning(mcRdd, _type))
      out = in1.mapPartitionsToPair(new RDDMapMMPartitionFunction(_type, in2), true);
    else out = in1.mapToPair(new RDDMapMMFunction(_type, in2));

    // empty output block filter
    if (!_outputEmpty) out = out.filter(new FilterNonEmptyBlocksFunction());

    // perform aggregation if necessary and put output into symbol table
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
      MatrixBlock out2 = RDDAggregateUtils.sumStable(out);

      // put output block into symbol table (no lineage because single block)
      // this also includes implicit maintenance of matrix characteristics
      sec.setMatrixOutput(output.getName(), out2);
    } else // MULTI_BLOCK or NONE
    {
      if (_aggtype == SparkAggType.MULTI_BLOCK) out = RDDAggregateUtils.sumByKeyStable(out);

      // put output RDD handle into symbol table
      sec.setRDDHandleForVariable(output.getName(), out);
      sec.addLineageRDD(output.getName(), rddVar);
      sec.addLineageBroadcast(output.getName(), bcastVar);

      // update output statistics if not inferred
      updateBinaryMMOutputMatrixCharacteristics(sec, true);
    }
  }