예제 #1
0
  protected void elPrepPreprocess(
      Context context, PreprocessingTools tools, SAMRecordIterator input, String output)
      throws InterruptedException, IOException, QualityException, URISyntaxException {
    String dictF = ref.substring(0, ref.lastIndexOf('.')) + ".dict";
    String rg = createReadGroupRecordString(RGID, RGLB, RGPL, RGPU, RGSM);
    String preSamOut = tmpFileBase + "-p1.sam";
    String samOut = tmpFileBase + "-p2.sam";
    String fCounts = tmpFileBase + "-features.count";

    outHeader = header.clone();
    outHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);

    Logger.DEBUG("call elPrep");
    context.setStatus("call elPrep");
    int reads;
    if (keep) {
      reads =
          tools.callElPrep(
              preSamOut, samOut, inputIsBam ? null : rg, threads, input, outHeader, dictF);
    } else {
      reads =
          tools.streamElPrep(
              context, samOut, inputIsBam ? null : rg, threads, input, outHeader, dictF);
    }

    Logger.DEBUG(reads + " reads processed in elPrep");
    context.getCounter(HalvadeCounters.IN_PREP_READS).increment(reads);

    if (gff != null) {
      Logger.DEBUG("featureCounts");
      context.setStatus("featureCounts");
      tools.runFeatureCounts(gff, samOut, fCounts, threads);
      HalvadeFileUtils.uploadFileToHDFS(
          context,
          FileSystem.get(new URI(outputdir), context.getConfiguration()),
          fCounts,
          outputdir + context.getTaskAttemptID().toString() + ".count");
    }
    context.setStatus("convert SAM to BAM");
    Logger.DEBUG("convert SAM to BAM");
    tools.callSAMToBAM(samOut, output, threads);
    context.setStatus("build bam index");
    Logger.DEBUG("build bam index");
    tools.runBuildBamIndex(output);
    // remove temporary files
    HalvadeFileUtils.removeLocalFile(keep, preSamOut, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, samOut, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, fCounts);
  }
예제 #2
0
  protected void PicardPreprocess(
      Context context, PreprocessingTools tools, SAMRecordIterator input, String output)
      throws InterruptedException, QualityException, IOException, URISyntaxException {
    outHeader = header.clone();
    outHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
    // tmp files
    String tmpOut1 = tmpFileBase + "-p1.bam";
    String tmpOut2 = tmpFileBase + "-p2.bam";
    String tmpOut3 = tmpFileBase + "-p3.sam";
    String fCounts = tmpFileBase + "-features.count";
    String tmpMetrics = tmpFileBase + "-p3-metrics.txt";
    SAMFileWriterFactory factory = new SAMFileWriterFactory();
    if (!inputIsBam) {
      outHeader.addReadGroup(bamrg);
    }
    SAMFileWriter writer = factory.makeBAMWriter(outHeader, true, new File(tmpOut1));

    long startTime = System.currentTimeMillis();

    int count = 0;
    SAMRecord sam;
    while (input.hasNext()) {
      sam = input.next();
      writer.addAlignment(sam);
      count++;
    }
    int reads = input.getCount();
    writer.close();

    context.getCounter(HalvadeCounters.IN_PREP_READS).increment(reads);
    long estimatedTime = System.currentTimeMillis() - startTime;
    context.getCounter(HalvadeCounters.TIME_HADOOP_SAMTOBAM).increment(estimatedTime);
    Logger.DEBUG("time writing " + count + " records to disk: " + estimatedTime / 1000);

    Logger.DEBUG("clean sam");
    context.setStatus("clean sam");
    tools.runCleanSam(tmpOut1, tmpOut2);
    Logger.DEBUG("mark duplicates");
    context.setStatus("mark duplicates");
    tools.runMarkDuplicates(tmpOut2, tmpOut3, tmpMetrics);

    if (gff != null) {
      // tmpOut3 is sam for htseq count!
      Logger.DEBUG("featureCounts");
      context.setStatus("featureCounts");
      tools.runFeatureCounts(gff, tmpOut3, fCounts, threads);
      HalvadeFileUtils.uploadFileToHDFS(
          context,
          FileSystem.get(new URI(outputdir), context.getConfiguration()),
          fCounts,
          outputdir + context.getTaskAttemptID().toString() + ".count");
    }

    if (!inputIsBam) {
      Logger.DEBUG("add read-group");
      context.setStatus("add read-group");
      tools.runAddOrReplaceReadGroups(tmpOut3, output, RGID, RGLB, RGPL, RGPU, RGSM);
    } else {
      context.setStatus("convert SAM to BAM");
      Logger.DEBUG("convert SAM to BAM");
      tools.callSAMToBAM(tmpOut3, output, threads);
    }

    Logger.DEBUG("build bam index");
    context.setStatus("build bam index");
    tools.runBuildBamIndex(output);

    estimatedTime = System.currentTimeMillis() - startTime;
    Logger.DEBUG("estimated time: " + estimatedTime / 1000);

    // remove all temporary files now!
    HalvadeFileUtils.removeLocalFile(keep, tmpMetrics, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, tmpOut1, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, tmpOut2, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, tmpOut3, context, HalvadeCounters.FOUT_GATK_TMP);
    HalvadeFileUtils.removeLocalFile(keep, fCounts);
  }