@Override public ExecutionResult execute(ExecutionContext executionContext) throws ProcessExecutionException, InterruptedException { try { StopWatch stopWatch = new StopWatch(); stopWatch.start(); RampartProcessArgs args = this.getRampartArgs(); // Force run parallel to false if not using a scheduler if (!executionContext.usingScheduler()) { if (args.isRunParallel()) { log.warn("Forcing linear execution due to lack of job scheduler"); } args.setRunParallel(false); } log.info("Starting " + this.getName() + " Process"); this.results = new ArrayList<>(); // Loop through all samples to process for (Mecq.Sample sample : args.samples) { if (sample.failedAtStage == -1) { File stageDir = args.getStageDir(sample); // Ensure sample output directory exists if (!stageDir.exists()) { stageDir.mkdirs(); } // Do samples specific work TaskResult sampleResults = this.executeSample(sample, executionContext); // Collect results for (ExecutionResult res : sampleResults.getProcessResults()) { results.add(res); } } } // Ensure wait log directory exists File logDir = new File(args.outputDir, "wait_logs"); if (!logDir.exists()) { logDir.mkdirs(); } // If we're using a scheduler and we have been asked to run jobs // in parallel, then we should wait for all those to complete before finishing this stage. if (executionContext.usingScheduler() && args.runParallel) { log.info("Running all " + this.getName() + " jobs in parallel, waiting for completion"); MultiWaitResult mrw = this.conanExecutorService.executeScheduledWait( results, args.jobPrefix + "-*", ExitStatus.Type.COMPLETED_ANY, args.jobPrefix + "-wait", logDir); } // Check all the required output files are in place (delegated to child class) // Loop through all samples to process for (int i = 0; i < args.samples.size(); i++) { Mecq.Sample sample = args.samples.get(i); if (sample.failedAtStage == -1) { boolean valid = this.validateOutput(sample); if (!valid) { sample.failedAtStage = args.getStage().ordinal(); if (args.samples.size() == 1) { throw new IOException( "Stage " + args.getStage().name() + " failed to produce valid output."); } else { log.error( "Sample " + sample.name + " failed to produce valid output for stage " + args.getStage().name() + " discontinuing pipeline for this sample."); } } } } this.finalise(); log.info("Finished " + this.getName() + " Process"); stopWatch.stop(); this.taskResult = new DefaultTaskResult( "rampart-" + this.getName(), true, results, stopWatch.getTime() / 1000L); // Output the resource usage to file FileUtils.writeLines( new File(logDir, args.jobPrefix + ".summary"), this.taskResult.getOutput()); return new DefaultExecutionResult( this.taskResult.getTaskName(), 0, new String[] {}, null, -1, new ResourceUsage( this.taskResult.getMaxMemUsage(), this.taskResult.getActualTotalRuntime(), this.taskResult.getTotalExternalCputime())); } catch (IOException e) { throw new ProcessExecutionException(2, e); } }
@Override public ExecutionResult execute(ExecutionContext executionContext) throws ProcessExecutionException, InterruptedException { try { StopWatch stopWatch = new StopWatch(); stopWatch.start(); log.info("Starting Kmer Counting on all Reads"); // Create shortcut to args for convienience Args args = this.getArgs(); // Force run parallel to false if not using a scheduler if (!executionContext.usingScheduler() && args.isRunParallel()) { log.warn("Forcing linear execution due to lack of job scheduler"); args.setRunParallel(false); } // Create the output directory args.getOutputDir().mkdirs(); JobOutputMap jfCountOutputs = new JobOutputMap(); List<ExecutionResult> jobResults = new ArrayList<>(); List<ExecutionResult> allJobResults = new ArrayList<>(); // Create the output directory for the RAW datasets File rawOutputDir = new File(args.getOutputDir(), "raw"); if (!rawOutputDir.exists()) { rawOutputDir.mkdirs(); } // Start jellyfish on all RAW datasets for (Library lib : args.getAllLibraries()) { // Execute jellyfish and add id to list of job ids JobOutput jfOut = this.executeJellyfishCount(args, "raw", args.getOutputDir(), lib); jobResults.add(jfOut.getResult()); allJobResults.add(jfOut.getResult()); jfCountOutputs.updateTracker("raw", jfOut.getOutputFile()); } // Also start jellyfish on all the prep-processed libraries from MECQ if (args.getAllMecqs() != null) { for (Mecq.EcqArgs ecqArgs : args.getAllMecqs()) { // Create the output directory for the RAW datasets File ecqOutputDir = new File(args.getOutputDir(), ecqArgs.getName()); if (!ecqOutputDir.exists()) { ecqOutputDir.mkdirs(); } for (Library lib : ecqArgs.getOutputLibraries()) { // Add jellyfish id to list of job ids JobOutput jfOut = this.executeJellyfishCount(args, ecqArgs.getName(), args.getOutputDir(), lib); jobResults.add(jfOut.getResult()); allJobResults.add(jfOut.getResult()); jfCountOutputs.updateTracker(ecqArgs.getName(), jfOut.getOutputFile()); } } } // If we're using a scheduler and we have been asked to run each job // in parallel, then we should wait for all those to complete before continueing. if (executionContext.usingScheduler() && args.isRunParallel()) { log.info("Kmer counting all ECQ groups in parallel, waiting for completion"); this.conanExecutorService.executeScheduledWait( jobResults, args.getJobPrefix() + "-count-*", ExitStatus.Type.COMPLETED_ANY, args.getJobPrefix() + "-kmer-count-wait", args.getOutputDir()); } // Waiting point... clear job ids. jobResults.clear(); JobOutputMap mergedOutputs = new JobOutputMap(); // Now execute merge jobs if required for (Map.Entry<String, Set<File>> entry : jfCountOutputs.entrySet()) { String ecqName = entry.getKey(); Set<File> fileSet = entry.getValue(); // Only merge if there's more than one library if (fileSet.size() > 1) { JobOutput jfOut = this.executeJellyfishMerger( args, ecqName, fileSet, new File(args.getOutputDir(), ecqName)); jobResults.add(jfOut.getResult()); allJobResults.add(jfOut.getResult()); mergedOutputs.updateTracker(ecqName, jfOut.getOutputFile()); } } // If we're using a scheduler and we have been asked to run each job // in parallel, then we should wait for all those to complete before continueing. if (executionContext.usingScheduler() && args.isRunParallel()) { log.info( "Creating merged kmer counts for all ECQ groups in parallel, waiting for completion"); this.conanExecutorService.executeScheduledWait( jobResults, args.getJobPrefix() + "-merge-*", ExitStatus.Type.COMPLETED_ANY, args.getJobPrefix() + "-kmer-merge-wait", args.getOutputDir()); } // Waiting point... clear job ids. jobResults.clear(); // Combine all jellyfish out maps jfCountOutputs.combine(mergedOutputs); String katGcpJobPrefix = args.getJobPrefix() + "-kat-gcp"; // Run KAT GCP on everything List<ExecutionResult> katGcpResults = this.executeKatGcp( jfCountOutputs, katGcpJobPrefix, args.getThreadsPerProcess(), args.getMemoryPerProcess(), args.isRunParallel()); for (ExecutionResult result : katGcpResults) { result.setName(result.getName().substring(args.getJobPrefix().length() + 1)); jobResults.add(result); allJobResults.add(result); } // If we're using a scheduler and we have been asked to run each job // in parallel, then we should wait for all those to complete before continueing. if (executionContext.usingScheduler() && args.isRunParallel()) { log.info("Running \"kat gcp\" for all ECQ groups in parallel, waiting for completion"); this.conanExecutorService.executeScheduledWait( jobResults, katGcpJobPrefix + "*", ExitStatus.Type.COMPLETED_ANY, args.getJobPrefix() + "-kat-gcp-wait", args.getOutputDir()); } // Waiting point... clear job ids. jobResults.clear(); log.info("Kmer counting of all reads finished."); stopWatch.stop(); TaskResult taskResult = new DefaultTaskResult( "rampart-read_analysis-kmer", true, allJobResults, stopWatch.getTime() / 1000L); // Output the resource usage to file FileUtils.writeLines( new File(args.getOutputDir(), args.getJobPrefix() + ".summary"), taskResult.getOutput()); return new DefaultExecutionResult( taskResult.getTaskName(), 0, new String[] {}, null, -1, new ResourceUsage( taskResult.getMaxMemUsage(), taskResult.getActualTotalRuntime(), taskResult.getTotalExternalCputime())); } catch (ConanParameterException | IOException e) { throw new ProcessExecutionException(-1, e); } }