Exemplo n.º 1
0
  private List<ExecutionResult> executeKatGcp(
      JobOutputMap jfCountOutputs, String jobPrefix, int threads, int memory, boolean runInParallel)
      throws InterruptedException, ProcessExecutionException, ConanParameterException {

    List<ExecutionResult> output = new ArrayList<>();

    for (Map.Entry<String, Set<File>> entry : jfCountOutputs.entrySet()) {

      for (File inputFile : entry.getValue()) {

        File outputPrefix = new File(inputFile.getAbsolutePath() + ".kat-gcp");
        File matrixFile = new File(outputPrefix + ".mx");
        File plotFile = new File(matrixFile.getAbsolutePath() + ".png");

        KatGcpV1 katGcpProc = this.makeKatGcpProc(inputFile, outputPrefix, threads);
        KatPlotDensityV1 katPlotDensityProc = this.makeKatPlotDensityProc(matrixFile, plotFile);
        katGcpProc.addPostCommand(katPlotDensityProc.getCommand());

        ExecutionResult result =
            this.conanExecutorService.executeProcess(
                katGcpProc,
                inputFile.getParentFile(),
                jobPrefix + "-" + inputFile.getName(),
                threads,
                memory,
                runInParallel);

        output.add(result);
      }
    }

    return output;
  }
Exemplo n.º 2
0
  public void combine(JobOutputMap other) {

    for (Map.Entry<String, Set<File>> entry : other.entrySet()) {

      if (this.containsKey(entry.getKey())) {
        Set<File> fileSet = this.get(entry.getKey());

        for (File f : entry.getValue()) {
          if (!fileSet.contains(f)) {
            fileSet.add(f);
          }
        }
      } else {
        this.put(entry.getKey(), entry.getValue());
      }
    }
  }
Exemplo n.º 3
0
  @Override
  public ExecutionResult execute(ExecutionContext executionContext)
      throws ProcessExecutionException, InterruptedException {

    try {

      StopWatch stopWatch = new StopWatch();
      stopWatch.start();

      log.info("Starting Kmer Counting on all Reads");

      // Create shortcut to args for convienience
      Args args = this.getArgs();

      // Force run parallel to false if not using a scheduler
      if (!executionContext.usingScheduler() && args.isRunParallel()) {
        log.warn("Forcing linear execution due to lack of job scheduler");
        args.setRunParallel(false);
      }

      // Create the output directory
      args.getOutputDir().mkdirs();

      JobOutputMap jfCountOutputs = new JobOutputMap();
      List<ExecutionResult> jobResults = new ArrayList<>();
      List<ExecutionResult> allJobResults = new ArrayList<>();

      // Create the output directory for the RAW datasets
      File rawOutputDir = new File(args.getOutputDir(), "raw");

      if (!rawOutputDir.exists()) {
        rawOutputDir.mkdirs();
      }

      // Start jellyfish on all RAW datasets
      for (Library lib : args.getAllLibraries()) {

        // Execute jellyfish and add id to list of job ids
        JobOutput jfOut = this.executeJellyfishCount(args, "raw", args.getOutputDir(), lib);
        jobResults.add(jfOut.getResult());
        allJobResults.add(jfOut.getResult());
        jfCountOutputs.updateTracker("raw", jfOut.getOutputFile());
      }

      // Also start jellyfish on all the prep-processed libraries from MECQ
      if (args.getAllMecqs() != null) {
        for (Mecq.EcqArgs ecqArgs : args.getAllMecqs()) {

          // Create the output directory for the RAW datasets
          File ecqOutputDir = new File(args.getOutputDir(), ecqArgs.getName());

          if (!ecqOutputDir.exists()) {
            ecqOutputDir.mkdirs();
          }

          for (Library lib : ecqArgs.getOutputLibraries()) {

            // Add jellyfish id to list of job ids
            JobOutput jfOut =
                this.executeJellyfishCount(args, ecqArgs.getName(), args.getOutputDir(), lib);

            jobResults.add(jfOut.getResult());
            allJobResults.add(jfOut.getResult());
            jfCountOutputs.updateTracker(ecqArgs.getName(), jfOut.getOutputFile());
          }
        }
      }

      // If we're using a scheduler and we have been asked to run each job
      // in parallel, then we should wait for all those to complete before continueing.
      if (executionContext.usingScheduler() && args.isRunParallel()) {
        log.info("Kmer counting all ECQ groups in parallel, waiting for completion");
        this.conanExecutorService.executeScheduledWait(
            jobResults,
            args.getJobPrefix() + "-count-*",
            ExitStatus.Type.COMPLETED_ANY,
            args.getJobPrefix() + "-kmer-count-wait",
            args.getOutputDir());
      }

      // Waiting point... clear job ids.
      jobResults.clear();

      JobOutputMap mergedOutputs = new JobOutputMap();

      // Now execute merge jobs if required
      for (Map.Entry<String, Set<File>> entry : jfCountOutputs.entrySet()) {

        String ecqName = entry.getKey();
        Set<File> fileSet = entry.getValue();

        // Only merge if there's more than one library
        if (fileSet.size() > 1) {
          JobOutput jfOut =
              this.executeJellyfishMerger(
                  args, ecqName, fileSet, new File(args.getOutputDir(), ecqName));

          jobResults.add(jfOut.getResult());
          allJobResults.add(jfOut.getResult());
          mergedOutputs.updateTracker(ecqName, jfOut.getOutputFile());
        }
      }

      // If we're using a scheduler and we have been asked to run each job
      // in parallel, then we should wait for all those to complete before continueing.
      if (executionContext.usingScheduler() && args.isRunParallel()) {
        log.info(
            "Creating merged kmer counts for all ECQ groups in parallel, waiting for completion");
        this.conanExecutorService.executeScheduledWait(
            jobResults,
            args.getJobPrefix() + "-merge-*",
            ExitStatus.Type.COMPLETED_ANY,
            args.getJobPrefix() + "-kmer-merge-wait",
            args.getOutputDir());
      }

      // Waiting point... clear job ids.
      jobResults.clear();

      // Combine all jellyfish out maps
      jfCountOutputs.combine(mergedOutputs);

      String katGcpJobPrefix = args.getJobPrefix() + "-kat-gcp";

      // Run KAT GCP on everything
      List<ExecutionResult> katGcpResults =
          this.executeKatGcp(
              jfCountOutputs,
              katGcpJobPrefix,
              args.getThreadsPerProcess(),
              args.getMemoryPerProcess(),
              args.isRunParallel());

      for (ExecutionResult result : katGcpResults) {
        result.setName(result.getName().substring(args.getJobPrefix().length() + 1));
        jobResults.add(result);
        allJobResults.add(result);
      }

      // If we're using a scheduler and we have been asked to run each job
      // in parallel, then we should wait for all those to complete before continueing.
      if (executionContext.usingScheduler() && args.isRunParallel()) {
        log.info("Running \"kat gcp\" for all ECQ groups in parallel, waiting for completion");
        this.conanExecutorService.executeScheduledWait(
            jobResults,
            katGcpJobPrefix + "*",
            ExitStatus.Type.COMPLETED_ANY,
            args.getJobPrefix() + "-kat-gcp-wait",
            args.getOutputDir());
      }

      // Waiting point... clear job ids.
      jobResults.clear();

      log.info("Kmer counting of all reads finished.");

      stopWatch.stop();

      TaskResult taskResult =
          new DefaultTaskResult(
              "rampart-read_analysis-kmer", true, allJobResults, stopWatch.getTime() / 1000L);

      // Output the resource usage to file
      FileUtils.writeLines(
          new File(args.getOutputDir(), args.getJobPrefix() + ".summary"), taskResult.getOutput());

      return new DefaultExecutionResult(
          taskResult.getTaskName(),
          0,
          new String[] {},
          null,
          -1,
          new ResourceUsage(
              taskResult.getMaxMemUsage(),
              taskResult.getActualTotalRuntime(),
              taskResult.getTotalExternalCputime()));
    } catch (ConanParameterException | IOException e) {
      throw new ProcessExecutionException(-1, e);
    }
  }