Exemple #1
0
  public static JobReturn runJob(
      MRJobInstruction inst,
      String[] inputs,
      InputInfo[] inputInfos,
      long[] rlens,
      long[] clens,
      int[] brlens,
      int[] bclens,
      String combineInstructions,
      int numReducers,
      int replication,
      byte[] resultIndexes,
      String[] outputs,
      OutputInfo[] outputInfos)
      throws Exception {
    JobConf job;
    job = new JobConf(CombineMR.class);
    job.setJobName("Standalone-MR");

    boolean inBlockRepresentation = MRJobConfiguration.deriveRepresentation(inputInfos);

    // whether use block representation or cell representation
    MRJobConfiguration.setMatrixValueClass(job, inBlockRepresentation);

    byte[] inputIndexes = new byte[inputs.length];
    for (byte b = 0; b < inputs.length; b++) inputIndexes[b] = b;

    // set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(
        job,
        inputIndexes,
        inputs,
        inputInfos,
        brlens,
        bclens,
        true,
        inBlockRepresentation ? ConvertTarget.BLOCK : ConvertTarget.CELL);

    // set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, inputIndexes, rlens, clens);

    // set up the block size
    MRJobConfiguration.setBlocksSizes(job, inputIndexes, brlens, bclens);

    // set up unary instructions that will perform in the mapper
    MRJobConfiguration.setInstructionsInMapper(job, "");

    // set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setAggregateInstructions(job, "");

    // set up the instructions that will happen in the reducer, after the aggregation instrucions
    MRJobConfiguration.setInstructionsInReducer(job, "");

    MRJobConfiguration.setCombineInstructions(job, combineInstructions);

    // set up the replication factor for the results
    job.setInt("dfs.replication", replication);

    // set up what matrices are needed to pass from the mapper to reducer
    HashSet<Byte> mapoutputIndexes =
        MRJobConfiguration.setUpOutputIndexesForMapper(
            job, inputIndexes, null, null, combineInstructions, resultIndexes);

    // set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(
        job, resultIndexes, null, outputs, outputInfos, inBlockRepresentation);

    // configure mapper and the mapper output key value pairs
    job.setMapperClass(GMRMapper.class);

    job.setMapOutputKeyClass(MatrixIndexes.class);
    if (inBlockRepresentation) job.setMapOutputValueClass(TaggedMatrixBlock.class);
    else job.setMapOutputValueClass(TaggedMatrixCell.class);

    // configure reducer
    job.setReducerClass(InnerReducer.class);
    // job.setReducerClass(PassThroughReducer.class);

    MatrixChar_N_ReducerGroups ret =
        MRJobConfiguration.computeMatrixCharacteristics(
            job,
            inputIndexes,
            null,
            null,
            null,
            combineInstructions,
            resultIndexes,
            mapoutputIndexes,
            false);
    MatrixCharacteristics[] stats = ret.stats;

    // set up the number of reducers
    MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers);

    // Print the complete instruction
    if (LOG.isTraceEnabled()) inst.printCompleteMRJobInstruction(stats);

    // By default, the job executes in "cluster" mode.
    // Determine if we can optimize and run it in "local" mode.
    MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
      inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
    }

    // set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);

    RunningJob runjob = JobClient.runJob(job);

    return new JobReturn(stats, runjob.isSuccessful());
  }