Ejemplo n.º 1
0
  public int run(String[] args) throws Exception {
    ServiceLoader<? extends AminoJob> jobs = ServiceLoader.load(AminoJob.class);
    if (!jobs.iterator().hasNext()) {
      jobs = ServiceLoader.load(AminoEnrichmentJob.class);
    }
    if (!jobs.iterator().hasNext()) {
      jobs = ServiceLoader.load(AminoReuseEnrichmentJob.class);
    }

    Configuration conf = getConf();
    // AminoConfiguration.loadDefault(conf, "AminoDefaults", true);
    // boolean complete = createTables(conf);
    boolean complete = true;

    updateStatus(JobStatus.RUNNING);

    // boolean complete = true;
    for (AminoJob aj : jobs) {
      aj.setConfig(conf);
      logger.info("Running Job -> " + aj.getJobName());
      if (complete) {
        Job job = new Job(conf, aj.getJobName());
        job.setJarByClass(aj.getClass());

        // Add the class to the conf it can be grabbed in the Reduce phase
        AminoDriverUtils.setAminoJob(job.getConfiguration(), aj.getClass());

        int jobType = setJobParameters(job, aj);

        // Call job configuration for special properties
        jobConfiguration(job);

        complete = job.waitForCompletion(true);

        if (jobType == JOB_TYPE_ENRICHMENT || jobType == JOB_TYPE_REUSE_ENRICHMENT) {
          if (!stopOnFirstPhase) {
            stopOnFirstPhase = conf.getBoolean("stop.on.first.phase", stopOnFirstPhase);
          }
          if (complete && !stopOnFirstPhase) {
            complete = runSecondPhaseEnrichmentJob((AminoEnrichmentJob) aj, conf, jobType);
            if (jobType == JOB_TYPE_REUSE_ENRICHMENT)
              ((AminoReuseEnrichmentJob) aj).directoryCleanup(conf);
          } else if (!complete) {
            System.err.println("Job failed, unable to run second enrichment step");
          }
        }
      }
    }

    updateStatus(complete ? JobStatus.COMPLETE : JobStatus.FAILED);

    return complete ? 0 : 1;
  }
Ejemplo n.º 2
0
  private boolean runSecondPhaseEnrichmentJob(
      AminoEnrichmentJob aej, Configuration conf, int jobType) throws Exception {
    System.out.println("Running Amino Job");

    final Job job = new Job(conf, aej.getJobName() + " phase 2");
    job.setJarByClass(aej.getClass());

    AminoDriverUtils.setAminoJob(job.getConfiguration(), aej.getClass());

    if (jobType == JOB_TYPE_ENRICHMENT) {
      job.getConfiguration().set(AminoDriverUtils.ENRICHMENT_OUTPUT, this.enrichmentOutput);
    } else if (jobType == JOB_TYPE_REUSE_ENRICHMENT) {
      String root = conf.get(AminoDriverUtils.ENRICHMENT_ROOT_OUTPUT);
      String front = "";
      if (!root.endsWith("/")) {
        front = "/";
      }
      root += front;

      final Iterable<String> inputs =
          ((AminoReuseEnrichmentJob) aej)
              .getSecondPhaseEnrichmentInputDirectories(job.getConfiguration());
      String inputStr = "";
      System.out.println("Using enrichment input paths:");
      for (String input : inputs) {
        if (inputStr.length() > 0) {
          inputStr += "," + PathUtils.getJobDataPath(root + input);
        } else {
          inputStr += PathUtils.getJobDataPath(root + input);
        }
        System.out.println(PathUtils.getJobDataPath(root + input));
      }

      job.getConfiguration().set(AminoDriverUtils.ENRICHMENT_OUTPUT, inputStr);

      // Need to do this because the first phase data loader is sitting in this slot currently
      AminoInputFormat.setDataLoader(
          job.getConfiguration(), aej.getDataLoaderClass().newInstance());
    }

    int numReducers =
        job.getConfiguration()
            .getInt(
                AMINO_NUM_REDUCERS_ENRICH_PHASE2,
                job.getConfiguration().getInt(AMINO_NUM_REDUCERS, DEFAULT_NUM_REDUCERS));
    job.setNumReduceTasks(numReducers);

    job.setMapperClass(FrameworkMapper.class);
    job.setReducerClass(FrameworkReducer.class);

    job.setMapOutputKeyClass(BucketStripped.class);
    job.setMapOutputValueClass(MapWritable.class);

    job.setOutputKeyClass(BucketStripped.class);
    job.setOutputValueClass(AminoWritable.class);

    job.setInputFormatClass(AminoMultiInputFormat.class);
    AminoMultiInputFormat.setDataLoader(
        job.getConfiguration(), aej.getDataLoaderClass().newInstance());

    // Call job configuration for special properties
    jobConfiguration(job);

    @SuppressWarnings("serial")
    ArrayList<Class<? extends DataLoader>> joinSource =
        new ArrayList<Class<? extends DataLoader>>() {
          {
            add(EnrichmentDataLoader.class);
          }
        };
    AminoMultiInputFormat.setJoinDataLoaders(job.getConfiguration(), joinSource);

    job.setOutputFormatClass(AminoOutputFormat.class);
    AminoOutputFormat.setAminoConfigPath(
        job, job.getConfiguration().get(AminoConfiguration.DEFAULT_CONFIGURATION_PATH_KEY));

    String output = job.getConfiguration().get("amino.output");
    System.out.println("Output will be written to: " + PathUtils.getJobDataPath(output));
    AminoOutputFormat.setOutputPath(job, new Path(PathUtils.getJobDataPath(output)));
    JobUtilities.deleteDirectory(job.getConfiguration(), output);
    CacheBuilder.buildCaches(
        AminoDataUtils.getDataLoader(job.getConfiguration()), aej, output, job.getConfiguration());

    return job.waitForCompletion(true);
  }