Ejemplo n.º 1
0
  @Override
  public ExecutionResult execute(ExecutionContext executionContext)
      throws ProcessExecutionException, InterruptedException {

    try {
      StopWatch stopWatch = new StopWatch();
      stopWatch.start();

      RampartProcessArgs args = this.getRampartArgs();

      // Force run parallel to false if not using a scheduler
      if (!executionContext.usingScheduler()) {
        if (args.isRunParallel()) {
          log.warn("Forcing linear execution due to lack of job scheduler");
        }
        args.setRunParallel(false);
      }

      log.info("Starting " + this.getName() + " Process");
      this.results = new ArrayList<>();

      // Loop through all samples to process
      for (Mecq.Sample sample : args.samples) {

        if (sample.failedAtStage == -1) {

          File stageDir = args.getStageDir(sample);

          // Ensure sample output directory exists
          if (!stageDir.exists()) {
            stageDir.mkdirs();
          }

          // Do samples specific work
          TaskResult sampleResults = this.executeSample(sample, executionContext);

          // Collect results
          for (ExecutionResult res : sampleResults.getProcessResults()) {
            results.add(res);
          }
        }
      }

      // Ensure wait log directory exists
      File logDir = new File(args.outputDir, "wait_logs");
      if (!logDir.exists()) {
        logDir.mkdirs();
      }

      // If we're using a scheduler and we have been asked to run jobs
      // in parallel, then we should wait for all those to complete before finishing this stage.
      if (executionContext.usingScheduler() && args.runParallel) {

        log.info("Running all " + this.getName() + " jobs in parallel, waiting for completion");
        MultiWaitResult mrw =
            this.conanExecutorService.executeScheduledWait(
                results,
                args.jobPrefix + "-*",
                ExitStatus.Type.COMPLETED_ANY,
                args.jobPrefix + "-wait",
                logDir);
      }

      // Check all the required output files are in place (delegated to child class)
      // Loop through all samples to process
      for (int i = 0; i < args.samples.size(); i++) {

        Mecq.Sample sample = args.samples.get(i);

        if (sample.failedAtStage == -1) {
          boolean valid = this.validateOutput(sample);

          if (!valid) {
            sample.failedAtStage = args.getStage().ordinal();
            if (args.samples.size() == 1) {
              throw new IOException(
                  "Stage " + args.getStage().name() + " failed to produce valid output.");
            } else {
              log.error(
                  "Sample "
                      + sample.name
                      + " failed to produce valid output for stage "
                      + args.getStage().name()
                      + " discontinuing pipeline for this sample.");
            }
          }
        }
      }

      this.finalise();

      log.info("Finished " + this.getName() + " Process");

      stopWatch.stop();

      this.taskResult =
          new DefaultTaskResult(
              "rampart-" + this.getName(), true, results, stopWatch.getTime() / 1000L);

      // Output the resource usage to file
      FileUtils.writeLines(
          new File(logDir, args.jobPrefix + ".summary"), this.taskResult.getOutput());

      return new DefaultExecutionResult(
          this.taskResult.getTaskName(),
          0,
          new String[] {},
          null,
          -1,
          new ResourceUsage(
              this.taskResult.getMaxMemUsage(),
              this.taskResult.getActualTotalRuntime(),
              this.taskResult.getTotalExternalCputime()));

    } catch (IOException e) {
      throw new ProcessExecutionException(2, e);
    }
  }
Ejemplo n.º 2
0
  @Override
  public ExecutionResult execute(ExecutionContext executionContext)
      throws ProcessExecutionException, InterruptedException {

    try {

      StopWatch stopWatch = new StopWatch();
      stopWatch.start();

      log.info("Starting Kmer Counting on all Reads");

      // Create shortcut to args for convienience
      Args args = this.getArgs();

      // Force run parallel to false if not using a scheduler
      if (!executionContext.usingScheduler() && args.isRunParallel()) {
        log.warn("Forcing linear execution due to lack of job scheduler");
        args.setRunParallel(false);
      }

      // Create the output directory
      args.getOutputDir().mkdirs();

      JobOutputMap jfCountOutputs = new JobOutputMap();
      List<ExecutionResult> jobResults = new ArrayList<>();
      List<ExecutionResult> allJobResults = new ArrayList<>();

      // Create the output directory for the RAW datasets
      File rawOutputDir = new File(args.getOutputDir(), "raw");

      if (!rawOutputDir.exists()) {
        rawOutputDir.mkdirs();
      }

      // Start jellyfish on all RAW datasets
      for (Library lib : args.getAllLibraries()) {

        // Execute jellyfish and add id to list of job ids
        JobOutput jfOut = this.executeJellyfishCount(args, "raw", args.getOutputDir(), lib);
        jobResults.add(jfOut.getResult());
        allJobResults.add(jfOut.getResult());
        jfCountOutputs.updateTracker("raw", jfOut.getOutputFile());
      }

      // Also start jellyfish on all the prep-processed libraries from MECQ
      if (args.getAllMecqs() != null) {
        for (Mecq.EcqArgs ecqArgs : args.getAllMecqs()) {

          // Create the output directory for the RAW datasets
          File ecqOutputDir = new File(args.getOutputDir(), ecqArgs.getName());

          if (!ecqOutputDir.exists()) {
            ecqOutputDir.mkdirs();
          }

          for (Library lib : ecqArgs.getOutputLibraries()) {

            // Add jellyfish id to list of job ids
            JobOutput jfOut =
                this.executeJellyfishCount(args, ecqArgs.getName(), args.getOutputDir(), lib);

            jobResults.add(jfOut.getResult());
            allJobResults.add(jfOut.getResult());
            jfCountOutputs.updateTracker(ecqArgs.getName(), jfOut.getOutputFile());
          }
        }
      }

      // If we're using a scheduler and we have been asked to run each job
      // in parallel, then we should wait for all those to complete before continueing.
      if (executionContext.usingScheduler() && args.isRunParallel()) {
        log.info("Kmer counting all ECQ groups in parallel, waiting for completion");
        this.conanExecutorService.executeScheduledWait(
            jobResults,
            args.getJobPrefix() + "-count-*",
            ExitStatus.Type.COMPLETED_ANY,
            args.getJobPrefix() + "-kmer-count-wait",
            args.getOutputDir());
      }

      // Waiting point... clear job ids.
      jobResults.clear();

      JobOutputMap mergedOutputs = new JobOutputMap();

      // Now execute merge jobs if required
      for (Map.Entry<String, Set<File>> entry : jfCountOutputs.entrySet()) {

        String ecqName = entry.getKey();
        Set<File> fileSet = entry.getValue();

        // Only merge if there's more than one library
        if (fileSet.size() > 1) {
          JobOutput jfOut =
              this.executeJellyfishMerger(
                  args, ecqName, fileSet, new File(args.getOutputDir(), ecqName));

          jobResults.add(jfOut.getResult());
          allJobResults.add(jfOut.getResult());
          mergedOutputs.updateTracker(ecqName, jfOut.getOutputFile());
        }
      }

      // If we're using a scheduler and we have been asked to run each job
      // in parallel, then we should wait for all those to complete before continueing.
      if (executionContext.usingScheduler() && args.isRunParallel()) {
        log.info(
            "Creating merged kmer counts for all ECQ groups in parallel, waiting for completion");
        this.conanExecutorService.executeScheduledWait(
            jobResults,
            args.getJobPrefix() + "-merge-*",
            ExitStatus.Type.COMPLETED_ANY,
            args.getJobPrefix() + "-kmer-merge-wait",
            args.getOutputDir());
      }

      // Waiting point... clear job ids.
      jobResults.clear();

      // Combine all jellyfish out maps
      jfCountOutputs.combine(mergedOutputs);

      String katGcpJobPrefix = args.getJobPrefix() + "-kat-gcp";

      // Run KAT GCP on everything
      List<ExecutionResult> katGcpResults =
          this.executeKatGcp(
              jfCountOutputs,
              katGcpJobPrefix,
              args.getThreadsPerProcess(),
              args.getMemoryPerProcess(),
              args.isRunParallel());

      for (ExecutionResult result : katGcpResults) {
        result.setName(result.getName().substring(args.getJobPrefix().length() + 1));
        jobResults.add(result);
        allJobResults.add(result);
      }

      // If we're using a scheduler and we have been asked to run each job
      // in parallel, then we should wait for all those to complete before continueing.
      if (executionContext.usingScheduler() && args.isRunParallel()) {
        log.info("Running \"kat gcp\" for all ECQ groups in parallel, waiting for completion");
        this.conanExecutorService.executeScheduledWait(
            jobResults,
            katGcpJobPrefix + "*",
            ExitStatus.Type.COMPLETED_ANY,
            args.getJobPrefix() + "-kat-gcp-wait",
            args.getOutputDir());
      }

      // Waiting point... clear job ids.
      jobResults.clear();

      log.info("Kmer counting of all reads finished.");

      stopWatch.stop();

      TaskResult taskResult =
          new DefaultTaskResult(
              "rampart-read_analysis-kmer", true, allJobResults, stopWatch.getTime() / 1000L);

      // Output the resource usage to file
      FileUtils.writeLines(
          new File(args.getOutputDir(), args.getJobPrefix() + ".summary"), taskResult.getOutput());

      return new DefaultExecutionResult(
          taskResult.getTaskName(),
          0,
          new String[] {},
          null,
          -1,
          new ResourceUsage(
              taskResult.getMaxMemUsage(),
              taskResult.getActualTotalRuntime(),
              taskResult.getTotalExternalCputime()));
    } catch (ConanParameterException | IOException e) {
      throw new ProcessExecutionException(-1, e);
    }
  }