/** * Decode a single line in a SAM text file. * * @param line line to decode. * @return A SAMReadFeature modeling that line. */ @Override public SAMReadFeature decode(String line) { // we may be asked to process a header line; ignore it if (line.startsWith("@")) return null; String[] tokens = new String[expectedTokenCount]; // split the line int count = ParsingUtils.splitWhitespace(line, tokens); // check to see if we've parsed the string into the right number of tokens (expectedTokenCount) if (count != expectedTokenCount) throw new CodecLineParsingException( "the SAM read line didn't have the expected number of tokens " + "(expected = " + expectedTokenCount + ", saw = " + count + " on " + "line = " + line + ")"); final String readName = tokens[0]; final int flags = Integer.parseInt(tokens[1]); final String contigName = tokens[2]; final int alignmentStart = Integer.parseInt(tokens[3]); final int mapQ = Integer.parseInt(tokens[4]); final String cigarString = tokens[5]; final String mateContigName = tokens[6]; final int mateAlignmentStart = Integer.parseInt(tokens[7]); final int inferredInsertSize = Integer.parseInt(tokens[8]); final byte[] bases = StringUtil.stringToBytes(tokens[9]); final byte[] qualities = StringUtil.stringToBytes(tokens[10]); // Infer the alignment end. Cigar cigar = TextCigarCodec.decode(cigarString); int alignmentEnd = alignmentStart + cigar.getReferenceLength() - 1; // Remove printable character conversion from the qualities. for (byte quality : qualities) quality -= 33; return new SAMReadFeature( readName, flags, contigName, alignmentStart, alignmentEnd, mapQ, cigarString, mateContigName, mateAlignmentStart, inferredInsertSize, bases, qualities); }
@Test public void testPerReadAlleleLikelihoodMap() { final PerReadAlleleLikelihoodMap map = new PerReadAlleleLikelihoodMap(); final Allele alleleA = Allele.create("A"); final double lik = -1.0; // ignored final int[] MQs = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, QualityUtils.MAPPING_QUALITY_UNAVAILABLE}; final List<Integer> MQsList = Arrays.asList(ArrayUtils.toObject(MQs)); // MQ 255 are excluded from the calculations, we test it here. final List<Integer> MQsListOK = new ArrayList<>(MQsList); // NOTE: if we just call remove(i), Java thinks i is an index. // A workaround for this overloading bogosity to to call removeAll and pass a collection // (casting i to (Object) would work too but it's more error prone) MQsListOK.removeAll(Collections.singleton(QualityUtils.MAPPING_QUALITY_UNAVAILABLE)); final int n1A = MQs.length; for (int i = 0; i < n1A; i++) { final GATKRead read = ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode("10M")); read.setMappingQuality(MQs[i]); map.add(read, alleleA, lik); } final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = Collections.singletonMap("sample1", map); final VariantContext vc = makeVC(); final ReferenceContext referenceContext = null; final Map<String, Object> annotate = new RMSMappingQuality().annotate(referenceContext, vc, perReadAlleleLikelihoodMap); Assert.assertEquals(annotate.size(), 1, "size"); Assert.assertEquals( annotate.keySet(), Collections.singleton(VCFConstants.RMS_MAPPING_QUALITY_KEY), "annots"); final double rms = MathUtils.rms(MQsListOK); // only those are MQ0 Assert.assertEquals( annotate.get(VCFConstants.RMS_MAPPING_QUALITY_KEY), String.format("%.2f", rms)); }