/** Converts the supplied adapter sequences to byte arrays in both fwd and rc. */ private byte[][] prepareAdapterSequences() { final Set<String> kmers = new HashSet<String>(); // Make a set of all kmers of adapterMatchLength for (final String seq : adapterSequence) { for (int i = 0; i <= seq.length() - ADAPTER_MATCH_LENGTH; ++i) { final String kmer = seq.substring(i, i + ADAPTER_MATCH_LENGTH).toUpperCase(); int ns = 0; for (final char ch : kmer.toCharArray()) if (ch == 'N') ++ns; if (ns <= MAX_ADAPTER_ERRORS) { kmers.add(kmer); kmers.add(SequenceUtil.reverseComplement(kmer)); } } } // Make an array of byte[] for the kmers final byte[][] adapterKmers = new byte[kmers.size()][]; int i = 0; for (final String kmer : kmers) { adapterKmers[i++] = StringUtil.stringToBytes(kmer); } return adapterKmers; }
/** * Asserts that files are readable and writable and then fires off an HsMetricsCalculator instance * to do the real work. */ protected int doWork() { IoUtil.assertFileIsReadable(getProbeIntervals()); IoUtil.assertFileIsReadable(TARGET_INTERVALS); IoUtil.assertFileIsReadable(INPUT); IoUtil.assertFileIsWritable(OUTPUT); if (PER_TARGET_COVERAGE != null) IoUtil.assertFileIsWritable(PER_TARGET_COVERAGE); final SAMFileReader samReader = new SAMFileReader(INPUT); final File probeIntervals = getProbeIntervals(); // Validate that the targets and baits have the same references as the reads file SequenceUtil.assertSequenceDictionariesEqual( samReader.getFileHeader().getSequenceDictionary(), IntervalList.fromFile(TARGET_INTERVALS).getHeader().getSequenceDictionary(), INPUT, TARGET_INTERVALS); SequenceUtil.assertSequenceDictionariesEqual( samReader.getFileHeader().getSequenceDictionary(), IntervalList.fromFile(probeIntervals).getHeader().getSequenceDictionary(), INPUT, probeIntervals); ReferenceSequenceFile ref = null; if (REFERENCE_SEQUENCE != null) { IoUtil.assertFileIsReadable(REFERENCE_SEQUENCE); ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE); SequenceUtil.assertSequenceDictionariesEqual( samReader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(), INPUT, REFERENCE_SEQUENCE); } final TargetMetricsCollector collector = makeCollector( METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(), ref, PER_TARGET_COVERAGE, TARGET_INTERVALS, probeIntervals, getProbeSetName()); // Add each record to the requested collectors final Iterator<SAMRecord> records = samReader.iterator(); final ProgressLogger progress = new ProgressLogger(log); while (records.hasNext()) { final SAMRecord sam = records.next(); collector.acceptRecord(sam, null); progress.record(sam); } // Write the output file final MetricsFile<HsMetrics, Integer> metrics = getMetricsFile(); collector.finish(); collector.addAllLevelsToFile(metrics); metrics.write(OUTPUT); return 0; }
private void collectQualityData(final SAMRecord record, final ReferenceSequence reference) { // If the read isnt an aligned PF read then look at the read for no-calls if (record.getReadUnmappedFlag() || record.getReadFailsVendorQualityCheckFlag() || !doRefMetrics) { final byte[] readBases = record.getReadBases(); for (int i = 0; i < readBases.length; i++) { if (SequenceUtil.isNoCall(readBases[i])) { badCycleHistogram.increment( CoordMath.getCycle(record.getReadNegativeStrandFlag(), readBases.length, i)); } } } else if (!record.getReadFailsVendorQualityCheckFlag()) { final boolean highQualityMapping = isHighQualityMapping(record); if (highQualityMapping) metrics.PF_HQ_ALIGNED_READS++; final byte[] readBases = record.getReadBases(); final byte[] refBases = reference.getBases(); final byte[] qualities = record.getBaseQualities(); final int refLength = refBases.length; long mismatchCount = 0; long hqMismatchCount = 0; for (final AlignmentBlock alignmentBlock : record.getAlignmentBlocks()) { final int readIndex = alignmentBlock.getReadStart() - 1; final int refIndex = alignmentBlock.getReferenceStart() - 1; final int length = alignmentBlock.getLength(); for (int i = 0; i < length && refIndex + i < refLength; ++i) { final int readBaseIndex = readIndex + i; boolean mismatch = !SequenceUtil.basesEqual(readBases[readBaseIndex], refBases[refIndex + i]); boolean bisulfiteBase = false; if (mismatch && isBisulfiteSequenced) { if ((record.getReadNegativeStrandFlag() && (refBases[refIndex + i] == 'G' || refBases[refIndex + i] == 'g') && (readBases[readBaseIndex] == 'A' || readBases[readBaseIndex] == 'a')) || ((!record.getReadNegativeStrandFlag()) && (refBases[refIndex + i] == 'C' || refBases[refIndex + i] == 'c') && (readBases[readBaseIndex] == 'T') || readBases[readBaseIndex] == 't')) { bisulfiteBase = true; mismatch = false; } } if (mismatch) mismatchCount++; metrics.PF_ALIGNED_BASES++; if (!bisulfiteBase) nonBisulfiteAlignedBases++; if (highQualityMapping) { metrics.PF_HQ_ALIGNED_BASES++; if (!bisulfiteBase) hqNonBisulfiteAlignedBases++; if (qualities[readBaseIndex] >= BASE_QUALITY_THRESHOLD) metrics.PF_HQ_ALIGNED_Q20_BASES++; if (mismatch) hqMismatchCount++; } if (mismatch || SequenceUtil.isNoCall(readBases[readBaseIndex])) { badCycleHistogram.increment( CoordMath.getCycle(record.getReadNegativeStrandFlag(), readBases.length, i)); } } } mismatchHistogram.increment(mismatchCount); hqMismatchHistogram.increment(hqMismatchCount); // Add any insertions and/or deletions to the global count for (final CigarElement elem : record.getCigar().getCigarElements()) { final CigarOperator op = elem.getOperator(); if (op == CigarOperator.INSERTION || op == CigarOperator.DELETION) ++this.indels; } } }