Esempio n. 1
0
  /**
   * Decode a single line in a SAM text file.
   *
   * @param line line to decode.
   * @return A SAMReadFeature modeling that line.
   */
  @Override
  public SAMReadFeature decode(String line) {
    // we may be asked to process a header line; ignore it
    if (line.startsWith("@")) return null;

    String[] tokens = new String[expectedTokenCount];

    // split the line
    int count = ParsingUtils.splitWhitespace(line, tokens);

    // check to see if we've parsed the string into the right number of tokens (expectedTokenCount)
    if (count != expectedTokenCount)
      throw new CodecLineParsingException(
          "the SAM read line didn't have the expected number of tokens "
              + "(expected = "
              + expectedTokenCount
              + ", saw = "
              + count
              + " on "
              + "line = "
              + line
              + ")");

    final String readName = tokens[0];
    final int flags = Integer.parseInt(tokens[1]);
    final String contigName = tokens[2];
    final int alignmentStart = Integer.parseInt(tokens[3]);
    final int mapQ = Integer.parseInt(tokens[4]);
    final String cigarString = tokens[5];
    final String mateContigName = tokens[6];
    final int mateAlignmentStart = Integer.parseInt(tokens[7]);
    final int inferredInsertSize = Integer.parseInt(tokens[8]);
    final byte[] bases = StringUtil.stringToBytes(tokens[9]);
    final byte[] qualities = StringUtil.stringToBytes(tokens[10]);

    // Infer the alignment end.
    Cigar cigar = TextCigarCodec.decode(cigarString);
    int alignmentEnd = alignmentStart + cigar.getReferenceLength() - 1;

    // Remove printable character conversion from the qualities.
    for (byte quality : qualities) quality -= 33;

    return new SAMReadFeature(
        readName,
        flags,
        contigName,
        alignmentStart,
        alignmentEnd,
        mapQ,
        cigarString,
        mateContigName,
        mateAlignmentStart,
        inferredInsertSize,
        bases,
        qualities);
  }
  @Test
  public void testPerReadAlleleLikelihoodMap() {
    final PerReadAlleleLikelihoodMap map = new PerReadAlleleLikelihoodMap();

    final Allele alleleA = Allele.create("A");
    final double lik = -1.0; // ignored

    final int[] MQs = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, QualityUtils.MAPPING_QUALITY_UNAVAILABLE};
    final List<Integer> MQsList = Arrays.asList(ArrayUtils.toObject(MQs));

    // MQ 255 are excluded from the calculations, we test it here.
    final List<Integer> MQsListOK = new ArrayList<>(MQsList);
    // NOTE: if we just call remove(i), Java thinks i is an index.
    // A workaround for this overloading bogosity to to call removeAll and pass a collection
    // (casting i to (Object) would work too but it's more error prone)
    MQsListOK.removeAll(Collections.singleton(QualityUtils.MAPPING_QUALITY_UNAVAILABLE));

    final int n1A = MQs.length;
    for (int i = 0; i < n1A; i++) {
      final GATKRead read = ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode("10M"));
      read.setMappingQuality(MQs[i]);
      map.add(read, alleleA, lik);
    }

    final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap =
        Collections.singletonMap("sample1", map);
    final VariantContext vc = makeVC();
    final ReferenceContext referenceContext = null;
    final Map<String, Object> annotate =
        new RMSMappingQuality().annotate(referenceContext, vc, perReadAlleleLikelihoodMap);
    Assert.assertEquals(annotate.size(), 1, "size");
    Assert.assertEquals(
        annotate.keySet(), Collections.singleton(VCFConstants.RMS_MAPPING_QUALITY_KEY), "annots");
    final double rms = MathUtils.rms(MQsListOK); // only those are MQ0
    Assert.assertEquals(
        annotate.get(VCFConstants.RMS_MAPPING_QUALITY_KEY), String.format("%.2f", rms));
  }