Example #1
0
  private boolean runSecondPhaseEnrichmentJob(
      AminoEnrichmentJob aej, Configuration conf, int jobType) throws Exception {
    System.out.println("Running Amino Job");

    final Job job = new Job(conf, aej.getJobName() + " phase 2");
    job.setJarByClass(aej.getClass());

    AminoDriverUtils.setAminoJob(job.getConfiguration(), aej.getClass());

    if (jobType == JOB_TYPE_ENRICHMENT) {
      job.getConfiguration().set(AminoDriverUtils.ENRICHMENT_OUTPUT, this.enrichmentOutput);
    } else if (jobType == JOB_TYPE_REUSE_ENRICHMENT) {
      String root = conf.get(AminoDriverUtils.ENRICHMENT_ROOT_OUTPUT);
      String front = "";
      if (!root.endsWith("/")) {
        front = "/";
      }
      root += front;

      final Iterable<String> inputs =
          ((AminoReuseEnrichmentJob) aej)
              .getSecondPhaseEnrichmentInputDirectories(job.getConfiguration());
      String inputStr = "";
      System.out.println("Using enrichment input paths:");
      for (String input : inputs) {
        if (inputStr.length() > 0) {
          inputStr += "," + PathUtils.getJobDataPath(root + input);
        } else {
          inputStr += PathUtils.getJobDataPath(root + input);
        }
        System.out.println(PathUtils.getJobDataPath(root + input));
      }

      job.getConfiguration().set(AminoDriverUtils.ENRICHMENT_OUTPUT, inputStr);

      // Need to do this because the first phase data loader is sitting in this slot currently
      AminoInputFormat.setDataLoader(
          job.getConfiguration(), aej.getDataLoaderClass().newInstance());
    }

    int numReducers =
        job.getConfiguration()
            .getInt(
                AMINO_NUM_REDUCERS_ENRICH_PHASE2,
                job.getConfiguration().getInt(AMINO_NUM_REDUCERS, DEFAULT_NUM_REDUCERS));
    job.setNumReduceTasks(numReducers);

    job.setMapperClass(FrameworkMapper.class);
    job.setReducerClass(FrameworkReducer.class);

    job.setMapOutputKeyClass(BucketStripped.class);
    job.setMapOutputValueClass(MapWritable.class);

    job.setOutputKeyClass(BucketStripped.class);
    job.setOutputValueClass(AminoWritable.class);

    job.setInputFormatClass(AminoMultiInputFormat.class);
    AminoMultiInputFormat.setDataLoader(
        job.getConfiguration(), aej.getDataLoaderClass().newInstance());

    // Call job configuration for special properties
    jobConfiguration(job);

    @SuppressWarnings("serial")
    ArrayList<Class<? extends DataLoader>> joinSource =
        new ArrayList<Class<? extends DataLoader>>() {
          {
            add(EnrichmentDataLoader.class);
          }
        };
    AminoMultiInputFormat.setJoinDataLoaders(job.getConfiguration(), joinSource);

    job.setOutputFormatClass(AminoOutputFormat.class);
    AminoOutputFormat.setAminoConfigPath(
        job, job.getConfiguration().get(AminoConfiguration.DEFAULT_CONFIGURATION_PATH_KEY));

    String output = job.getConfiguration().get("amino.output");
    System.out.println("Output will be written to: " + PathUtils.getJobDataPath(output));
    AminoOutputFormat.setOutputPath(job, new Path(PathUtils.getJobDataPath(output)));
    JobUtilities.deleteDirectory(job.getConfiguration(), output);
    CacheBuilder.buildCaches(
        AminoDataUtils.getDataLoader(job.getConfiguration()), aej, output, job.getConfiguration());

    return job.waitForCompletion(true);
  }