public int run(String[] args) throws Exception { ServiceLoader<? extends AminoJob> jobs = ServiceLoader.load(AminoJob.class); if (!jobs.iterator().hasNext()) { jobs = ServiceLoader.load(AminoEnrichmentJob.class); } if (!jobs.iterator().hasNext()) { jobs = ServiceLoader.load(AminoReuseEnrichmentJob.class); } Configuration conf = getConf(); // AminoConfiguration.loadDefault(conf, "AminoDefaults", true); // boolean complete = createTables(conf); boolean complete = true; updateStatus(JobStatus.RUNNING); // boolean complete = true; for (AminoJob aj : jobs) { aj.setConfig(conf); logger.info("Running Job -> " + aj.getJobName()); if (complete) { Job job = new Job(conf, aj.getJobName()); job.setJarByClass(aj.getClass()); // Add the class to the conf it can be grabbed in the Reduce phase AminoDriverUtils.setAminoJob(job.getConfiguration(), aj.getClass()); int jobType = setJobParameters(job, aj); // Call job configuration for special properties jobConfiguration(job); complete = job.waitForCompletion(true); if (jobType == JOB_TYPE_ENRICHMENT || jobType == JOB_TYPE_REUSE_ENRICHMENT) { if (!stopOnFirstPhase) { stopOnFirstPhase = conf.getBoolean("stop.on.first.phase", stopOnFirstPhase); } if (complete && !stopOnFirstPhase) { complete = runSecondPhaseEnrichmentJob((AminoEnrichmentJob) aj, conf, jobType); if (jobType == JOB_TYPE_REUSE_ENRICHMENT) ((AminoReuseEnrichmentJob) aj).directoryCleanup(conf); } else if (!complete) { System.err.println("Job failed, unable to run second enrichment step"); } } } } updateStatus(complete ? JobStatus.COMPLETE : JobStatus.FAILED); return complete ? 0 : 1; }
private boolean runSecondPhaseEnrichmentJob( AminoEnrichmentJob aej, Configuration conf, int jobType) throws Exception { System.out.println("Running Amino Job"); final Job job = new Job(conf, aej.getJobName() + " phase 2"); job.setJarByClass(aej.getClass()); AminoDriverUtils.setAminoJob(job.getConfiguration(), aej.getClass()); if (jobType == JOB_TYPE_ENRICHMENT) { job.getConfiguration().set(AminoDriverUtils.ENRICHMENT_OUTPUT, this.enrichmentOutput); } else if (jobType == JOB_TYPE_REUSE_ENRICHMENT) { String root = conf.get(AminoDriverUtils.ENRICHMENT_ROOT_OUTPUT); String front = ""; if (!root.endsWith("/")) { front = "/"; } root += front; final Iterable<String> inputs = ((AminoReuseEnrichmentJob) aej) .getSecondPhaseEnrichmentInputDirectories(job.getConfiguration()); String inputStr = ""; System.out.println("Using enrichment input paths:"); for (String input : inputs) { if (inputStr.length() > 0) { inputStr += "," + PathUtils.getJobDataPath(root + input); } else { inputStr += PathUtils.getJobDataPath(root + input); } System.out.println(PathUtils.getJobDataPath(root + input)); } job.getConfiguration().set(AminoDriverUtils.ENRICHMENT_OUTPUT, inputStr); // Need to do this because the first phase data loader is sitting in this slot currently AminoInputFormat.setDataLoader( job.getConfiguration(), aej.getDataLoaderClass().newInstance()); } int numReducers = job.getConfiguration() .getInt( AMINO_NUM_REDUCERS_ENRICH_PHASE2, job.getConfiguration().getInt(AMINO_NUM_REDUCERS, DEFAULT_NUM_REDUCERS)); job.setNumReduceTasks(numReducers); job.setMapperClass(FrameworkMapper.class); job.setReducerClass(FrameworkReducer.class); job.setMapOutputKeyClass(BucketStripped.class); job.setMapOutputValueClass(MapWritable.class); job.setOutputKeyClass(BucketStripped.class); job.setOutputValueClass(AminoWritable.class); job.setInputFormatClass(AminoMultiInputFormat.class); AminoMultiInputFormat.setDataLoader( job.getConfiguration(), aej.getDataLoaderClass().newInstance()); // Call job configuration for special properties jobConfiguration(job); @SuppressWarnings("serial") ArrayList<Class<? extends DataLoader>> joinSource = new ArrayList<Class<? extends DataLoader>>() { { add(EnrichmentDataLoader.class); } }; AminoMultiInputFormat.setJoinDataLoaders(job.getConfiguration(), joinSource); job.setOutputFormatClass(AminoOutputFormat.class); AminoOutputFormat.setAminoConfigPath( job, job.getConfiguration().get(AminoConfiguration.DEFAULT_CONFIGURATION_PATH_KEY)); String output = job.getConfiguration().get("amino.output"); System.out.println("Output will be written to: " + PathUtils.getJobDataPath(output)); AminoOutputFormat.setOutputPath(job, new Path(PathUtils.getJobDataPath(output))); JobUtilities.deleteDirectory(job.getConfiguration(), output); CacheBuilder.buildCaches( AminoDataUtils.getDataLoader(job.getConfiguration()), aej, output, job.getConfiguration()); return job.waitForCompletion(true); }