protected void elPrepPreprocess( Context context, PreprocessingTools tools, SAMRecordIterator input, String output) throws InterruptedException, IOException, QualityException, URISyntaxException { String dictF = ref.substring(0, ref.lastIndexOf('.')) + ".dict"; String rg = createReadGroupRecordString(RGID, RGLB, RGPL, RGPU, RGSM); String preSamOut = tmpFileBase + "-p1.sam"; String samOut = tmpFileBase + "-p2.sam"; String fCounts = tmpFileBase + "-features.count"; outHeader = header.clone(); outHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate); Logger.DEBUG("call elPrep"); context.setStatus("call elPrep"); int reads; if (keep) { reads = tools.callElPrep( preSamOut, samOut, inputIsBam ? null : rg, threads, input, outHeader, dictF); } else { reads = tools.streamElPrep( context, samOut, inputIsBam ? null : rg, threads, input, outHeader, dictF); } Logger.DEBUG(reads + " reads processed in elPrep"); context.getCounter(HalvadeCounters.IN_PREP_READS).increment(reads); if (gff != null) { Logger.DEBUG("featureCounts"); context.setStatus("featureCounts"); tools.runFeatureCounts(gff, samOut, fCounts, threads); HalvadeFileUtils.uploadFileToHDFS( context, FileSystem.get(new URI(outputdir), context.getConfiguration()), fCounts, outputdir + context.getTaskAttemptID().toString() + ".count"); } context.setStatus("convert SAM to BAM"); Logger.DEBUG("convert SAM to BAM"); tools.callSAMToBAM(samOut, output, threads); context.setStatus("build bam index"); Logger.DEBUG("build bam index"); tools.runBuildBamIndex(output); // remove temporary files HalvadeFileUtils.removeLocalFile(keep, preSamOut, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, samOut, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, fCounts); }
protected void PicardPreprocess( Context context, PreprocessingTools tools, SAMRecordIterator input, String output) throws InterruptedException, QualityException, IOException, URISyntaxException { outHeader = header.clone(); outHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate); // tmp files String tmpOut1 = tmpFileBase + "-p1.bam"; String tmpOut2 = tmpFileBase + "-p2.bam"; String tmpOut3 = tmpFileBase + "-p3.sam"; String fCounts = tmpFileBase + "-features.count"; String tmpMetrics = tmpFileBase + "-p3-metrics.txt"; SAMFileWriterFactory factory = new SAMFileWriterFactory(); if (!inputIsBam) { outHeader.addReadGroup(bamrg); } SAMFileWriter writer = factory.makeBAMWriter(outHeader, true, new File(tmpOut1)); long startTime = System.currentTimeMillis(); int count = 0; SAMRecord sam; while (input.hasNext()) { sam = input.next(); writer.addAlignment(sam); count++; } int reads = input.getCount(); writer.close(); context.getCounter(HalvadeCounters.IN_PREP_READS).increment(reads); long estimatedTime = System.currentTimeMillis() - startTime; context.getCounter(HalvadeCounters.TIME_HADOOP_SAMTOBAM).increment(estimatedTime); Logger.DEBUG("time writing " + count + " records to disk: " + estimatedTime / 1000); Logger.DEBUG("clean sam"); context.setStatus("clean sam"); tools.runCleanSam(tmpOut1, tmpOut2); Logger.DEBUG("mark duplicates"); context.setStatus("mark duplicates"); tools.runMarkDuplicates(tmpOut2, tmpOut3, tmpMetrics); if (gff != null) { // tmpOut3 is sam for htseq count! Logger.DEBUG("featureCounts"); context.setStatus("featureCounts"); tools.runFeatureCounts(gff, tmpOut3, fCounts, threads); HalvadeFileUtils.uploadFileToHDFS( context, FileSystem.get(new URI(outputdir), context.getConfiguration()), fCounts, outputdir + context.getTaskAttemptID().toString() + ".count"); } if (!inputIsBam) { Logger.DEBUG("add read-group"); context.setStatus("add read-group"); tools.runAddOrReplaceReadGroups(tmpOut3, output, RGID, RGLB, RGPL, RGPU, RGSM); } else { context.setStatus("convert SAM to BAM"); Logger.DEBUG("convert SAM to BAM"); tools.callSAMToBAM(tmpOut3, output, threads); } Logger.DEBUG("build bam index"); context.setStatus("build bam index"); tools.runBuildBamIndex(output); estimatedTime = System.currentTimeMillis() - startTime; Logger.DEBUG("estimated time: " + estimatedTime / 1000); // remove all temporary files now! HalvadeFileUtils.removeLocalFile(keep, tmpMetrics, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, tmpOut1, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, tmpOut2, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, tmpOut3, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, fCounts); }