protected void PicardPreprocess( Context context, PreprocessingTools tools, SAMRecordIterator input, String output) throws InterruptedException, QualityException, IOException, URISyntaxException { outHeader = header.clone(); outHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate); // tmp files String tmpOut1 = tmpFileBase + "-p1.bam"; String tmpOut2 = tmpFileBase + "-p2.bam"; String tmpOut3 = tmpFileBase + "-p3.sam"; String fCounts = tmpFileBase + "-features.count"; String tmpMetrics = tmpFileBase + "-p3-metrics.txt"; SAMFileWriterFactory factory = new SAMFileWriterFactory(); if (!inputIsBam) { outHeader.addReadGroup(bamrg); } SAMFileWriter writer = factory.makeBAMWriter(outHeader, true, new File(tmpOut1)); long startTime = System.currentTimeMillis(); int count = 0; SAMRecord sam; while (input.hasNext()) { sam = input.next(); writer.addAlignment(sam); count++; } int reads = input.getCount(); writer.close(); context.getCounter(HalvadeCounters.IN_PREP_READS).increment(reads); long estimatedTime = System.currentTimeMillis() - startTime; context.getCounter(HalvadeCounters.TIME_HADOOP_SAMTOBAM).increment(estimatedTime); Logger.DEBUG("time writing " + count + " records to disk: " + estimatedTime / 1000); Logger.DEBUG("clean sam"); context.setStatus("clean sam"); tools.runCleanSam(tmpOut1, tmpOut2); Logger.DEBUG("mark duplicates"); context.setStatus("mark duplicates"); tools.runMarkDuplicates(tmpOut2, tmpOut3, tmpMetrics); if (gff != null) { // tmpOut3 is sam for htseq count! Logger.DEBUG("featureCounts"); context.setStatus("featureCounts"); tools.runFeatureCounts(gff, tmpOut3, fCounts, threads); HalvadeFileUtils.uploadFileToHDFS( context, FileSystem.get(new URI(outputdir), context.getConfiguration()), fCounts, outputdir + context.getTaskAttemptID().toString() + ".count"); } if (!inputIsBam) { Logger.DEBUG("add read-group"); context.setStatus("add read-group"); tools.runAddOrReplaceReadGroups(tmpOut3, output, RGID, RGLB, RGPL, RGPU, RGSM); } else { context.setStatus("convert SAM to BAM"); Logger.DEBUG("convert SAM to BAM"); tools.callSAMToBAM(tmpOut3, output, threads); } Logger.DEBUG("build bam index"); context.setStatus("build bam index"); tools.runBuildBamIndex(output); estimatedTime = System.currentTimeMillis() - startTime; Logger.DEBUG("estimated time: " + estimatedTime / 1000); // remove all temporary files now! HalvadeFileUtils.removeLocalFile(keep, tmpMetrics, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, tmpOut1, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, tmpOut2, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, tmpOut3, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, fCounts); }