Example #1
0
  private int setJobParameters(Job job, AminoJob aj) throws Exception {
    final Configuration conf = job.getConfiguration();
    final Class<? extends DataLoader> dataLoaderClass = aj.getDataLoaderClass();
    AminoInputFormat.setDataLoader(job.getConfiguration(), dataLoaderClass.newInstance());

    if (aj instanceof AminoEnrichmentJob) {
      String output = "";
      int returnType = JOB_TYPE_ENRICHMENT;

      if (aj instanceof AminoReuseEnrichmentJob) {
        System.out.println("Running REUSE Enrichment Join Job");

        AminoReuseEnrichmentJob reuseJob = (AminoReuseEnrichmentJob) aj;
        AminoInputFormat.setDataLoader(
            job.getConfiguration(), reuseJob.getFirstPhaseDataLoaderClass().newInstance());

        String root = conf.get(AminoDriverUtils.ENRICHMENT_ROOT_OUTPUT);
        String front = "";
        if (!root.endsWith("/")) front = "/";
        root += front;
        String dir = reuseJob.getOutputSubDirectory(conf);
        output += root + dir;

        returnType = JOB_TYPE_REUSE_ENRICHMENT;
      } else {
        System.out.println("Running Enrichment Join Job");
      }

      int numReducers =
          conf.getInt(
              AMINO_NUM_REDUCERS_ENRICH_PHASE1,
              conf.getInt(AMINO_NUM_REDUCERS, DEFAULT_NUM_REDUCERS));
      job.setNumReduceTasks(numReducers);

      // Our Framework mapper and reducer
      job.setMapperClass(FrameworkEnrichmentJoinMapper.class);
      job.setCombinerClass(FrameworkEnrichmentJoinCombiner.class);
      job.setReducerClass(FrameworkEnrichmentJoinReducer.class);

      job.setMapOutputKeyClass(EnrichmentJoinKey.class); // Different
      job.setMapOutputValueClass(MapWritable.class);

      job.setOutputKeyClass(BucketStripped.class);
      job.setOutputValueClass(MapWritable.class); // Different

      job.setPartitionerClass(NaturalKeyPartitioner.class);
      job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
      job.setSortComparatorClass(CompositeKeyComparator.class);

      job.setInputFormatClass(AminoMultiInputFormat.class);

      AminoEnrichmentJob aej = (AminoEnrichmentJob) aj;
      // AminoMultiInputFormat.setJoinDataLoader(conf, aej.getEnrichmentDataLoader().newInstance());
      AminoMultiInputFormat.setJoinDataLoaders(conf, aej.getEnrichmentDataLoaders());
      AminoMultiInputFormat.setEnrichWorker(conf, aej.getEnrichWorker().newInstance());

      job.setOutputFormatClass(SequenceFileOutputFormat.class);

      // TODO If it already exists, and its age is less than job running frequency, just reuse it
      // instead of doing the above job...
      if (output.length() == 0) {
        output = getEnrichmentOutputPath(aej, conf);
      }
      System.out.println("Output will be written to: " + PathUtils.getJobDataPath(output));

      SequenceFileOutputFormat.setOutputPath(job, new Path(PathUtils.getJobDataPath(output)));
      JobUtilities.deleteDirectory(conf, output);

      CacheBuilder.buildCaches(AminoDataUtils.getDataLoader(conf), aj, output, conf);

      return returnType;

    } else {
      System.out.println("\n==================== Running Amino Job =================\n");

      // Our Framework mapper and reducer
      job.setMapperClass(FrameworkMapper.class);
      job.setReducerClass(FrameworkReducer.class);

      job.setMapOutputKeyClass(BucketStripped.class);
      job.setMapOutputValueClass(MapWritable.class);

      job.setOutputKeyClass(BucketStripped.class);
      job.setOutputValueClass(AminoWritable.class);

      job.setInputFormatClass(AminoInputFormat.class);

      job.setOutputFormatClass(AminoOutputFormat.class);
      job.setNumReduceTasks(conf.getInt(AMINO_NUM_REDUCERS, DEFAULT_NUM_REDUCERS));

      AminoOutputFormat.setAminoConfigPath(
          job, job.getConfiguration().get(AminoConfiguration.DEFAULT_CONFIGURATION_PATH_KEY));

      String output = conf.get("amino.output");
      System.out.println("Output will be written to: " + PathUtils.getJobDataPath(output));
      AminoOutputFormat.setOutputPath(job, new Path(PathUtils.getJobDataPath(output)));
      JobUtilities.deleteDirectory(conf, output);

      CacheBuilder.buildCaches(AminoDataUtils.getDataLoader(conf), aj, output, conf);
      return JOB_TYPE_NORMAL;
    }
  }