Ejemplo n.º 1
0
 public void testOverlapCigarConsistencyRev() {
   final byte[] read =
       DnaUtils.encodeString(
           DnaUtils.reverseComplement(
               "TAGGCGGGTTGCCAA TTAACTTGTA      GTCCTTGACA".replaceAll(" ", "")));
   final byte[] tmpl =
       DnaUtils.encodeString("TAGGG   TGGCCAA TTAACTTGTAGTGTGCGTCCTTGACA".replaceAll(" ", ""));
   final int[] actions = ActionsHelper.build("==========NNNNNN===============X====BBBX====", 0, 3);
   final AlignmentResult ar = new AlignmentResult(read, actions, tmpl);
   ar.setIdentifyingInfo(false, true);
   // System.out.println("cigar:" + ar.getCigarString(1, true) + " readString:" + ar.readString());
   final SAMRecord samrec = new SAMRecord(null);
   samrec.setCigarString(ar.getCigarString(true, false));
   samrec.setReadString(DnaUtils.reverseComplement(ar.readString()));
   samrec.setAttribute(SamUtils.ATTRIBUTE_NUM_MISMATCHES, 2);
   samrec.setAlignmentStart(1);
   samrec.setFlags(179);
   final NgsParams params =
       new NgsParamsBuilder()
           .gapOpenPenalty(EditDistanceFactory.DEFAULT_GAP_OPEN_PENALTY)
           .gapExtendPenalty(EditDistanceFactory.DEFAULT_GAP_EXTEND_PENALTY)
           .substitutionPenalty(EditDistanceFactory.DEFAULT_SUBSTITUTION_PENALTY)
           .unknownsPenalty(0)
           .create();
   try (MemoryPrintStream mps = new MemoryPrintStream()) {
     final SamValidator sv =
         new SamValidator(
             mps.printStream(), mps.printStream(), true, false, false, false, params, false);
     assertEquals(
         2 * EditDistanceFactory.DEFAULT_SUBSTITUTION_PENALTY,
         sv.isAtExpectedRef(tmpl, samrec, null));
   }
 }
Ejemplo n.º 2
0
  public void testUnknownPenalty() throws Exception {
    final SuperCigarValidator validator = new SuperCigarValidator(1);
    final SAMRecord samrec = new SAMRecord(null);

    samrec.setAlignmentStart(1);
    samrec.setCigarString("8=1X14=5N10=");
    samrec.setReadString("AGCCCCCNTACGTAAATAAGACATCACGATGATCA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "7=1R1X1T15=5N10=");
    samrec.setBaseQualityString("4316%%68883-56+141663,2.3----45/.,2");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "TA");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 1);
    samrec.setFlags(67);
    validator.setData(
        samrec,
        DnaUtils.encodeString("AGCCCCCNTACGTAAATAAGACATC     ACGATGATCA".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(DnaUtils.encodeString("AGCCCCCACNCGTAAATAAGACATCTTTTTACGATGATCA"));

    validator.parse();
    assertFalse(validator.getInvalidReason(), validator.isValid());
    assertEquals(
        validator.getInvalidReason(),
        "Super cigar alignment score was 3, but AS attribute was 1",
        validator.getInvalidReason());

    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 3);
    validator.setData(
        samrec,
        DnaUtils.encodeString("AGCCCCCNTACGTAAATAAGACATC     ACGATGATCA".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(DnaUtils.encodeString("AGCCCCCACNCGTAAATAAGACATCTTTTTACGATGATCA"));

    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());
  }
Ejemplo n.º 3
0
  public void testOverlapPastStartPosition() throws Exception {

    final SuperCigarValidator validator = new SuperCigarValidator(0);
    final SAMRecord samrec = new SAMRecord(null);

    samrec.setAlignmentStart(3);
    samrec.setCigarString("2=2I16=6N10=");
    samrec.setReadString("ATAAGAAGGAGTGGCACTTCCCTCAGCTCA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "2=2I1=4B1X1=1I17=6N10=");
    samrec.setBaseQualityString("20001.1-+,8/0/41373,1751662362");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "AAGG");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, ").,1/");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 6);
    samrec.setFlags(179);

    final String rawread = "TGAGCTGAGG     GAAGTGCCACTCCTTCACTCCTTAT".replaceAll(" ", "");
    final String rawqual = "2632661571     ,37314/0/8,+-1./1,.)10002".replaceAll(" ", "");

    validator.setData(
        samrec, DnaUtils.encodeString(rawread), FastaUtils.asciiToRawQuality(rawqual));
    validator.setTemplate(
        DnaUtils.encodeString(
            "TCAT  GAAGGAGTGGCACTTCCACCTGCCTCAGCTCATGCGTGATATCCAGG".replaceAll(" ", "")));
    //                                           ATAAGAAGGAGTGGCACTTCCCTCAGCTCA
    //                                           ATAAG
    //                                         TCA TGAAGGAGTGGCACTTCCACCTGCCTCAGCTCATGCGTGATATCCAGG
    //                                          GAGTGAAGGAGTGGCACTTC      CCTCAGCTCA

    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());
  }
Ejemplo n.º 4
0
  public void testDegenerate() throws Exception {
    final SuperCigarValidator validator = new SuperCigarValidator(0);
    final SAMRecord samrec = new SAMRecord(null);

    samrec.setAlignmentStart(1);
    samrec.setCigarString("2=1X13=5N20=");
    samrec.setReadString("GACGCCGAGGAAAAACAGGCGGATCGTCAGGAGTT");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "2=1X13=5N20=");
    samrec.setBaseQualityString("4316%%68883-56+141663,2.3----45/.,2");
    samrec.setFlags(73);
    validator.setData(
        samrec,
        DnaUtils.encodeString("                                   "),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(DnaUtils.encodeString("GAGGCCGAGGGGGGGAAAAACAGGCGGATCGTCAGGAGTT"));

    validator.parse();
    assertFalse(validator.isValid());

    samrec.setFlags(115);
    validator.setData(
        samrec,
        DnaUtils.encodeString("                                   "),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));

    validator.parse();
    assertFalse(validator.isValid());
  }
Ejemplo n.º 5
0
  public void testCgOverlap() throws Exception {
    final SuperCigarValidator validator = new SuperCigarValidator(0);
    final SAMRecord samrec = new SAMRecord(null);

    samrec.setAlignmentStart(1);
    samrec.setCigarString("25=5N10=");
    samrec.setReadString("tttgtaggtcggataaggcgttcatccgacacg");
    samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B20=5N10=");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "%6");
    samrec.setReadNegativeStrandFlag(false);
    samrec.setFlags(67);
    validator.setData(
        samrec,
        DnaUtils.encodeString("tttgtgtaggtcggataaggcgttc     atccgacacg".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(DnaUtils.encodeString("tttgtaggtcggataaggcgttcgggggatccgacacg"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    samrec.setCigarString("3=1X21=5N10=");
    samrec.setReadString("tttataggtcggataaggcgttcatccgacacg");
    samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B1X19=5N10=");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "A");

    validator.setData(
        samrec,
        DnaUtils.encodeString("tttgtataggtcggataaggcgttc     atccgacacg".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());
  }
Ejemplo n.º 6
0
  /**
   * HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY
   *
   * @param read
   */
  public GATKSAMRecord(final SAMRecord read) {
    super(read.getHeader());
    super.setReferenceIndex(read.getReferenceIndex());
    super.setAlignmentStart(read.getAlignmentStart());
    super.setReadName(read.getReadName());
    super.setMappingQuality(read.getMappingQuality());
    // indexing bin done below
    super.setCigar(read.getCigar());
    super.setFlags(read.getFlags());
    super.setMateReferenceIndex(read.getMateReferenceIndex());
    super.setMateAlignmentStart(read.getMateAlignmentStart());
    super.setInferredInsertSize(read.getInferredInsertSize());
    SAMReadGroupRecord samRG = read.getReadGroup();
    SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read);
    if (samAttr == null) {
      clearAttributes();
    } else {
      setAttributes(samAttr);
    }
    if (samRG != null) {
      GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
      setReadGroup(rg);
    }

    super.setFileSource(read.getFileSource());
    super.setReadName(read.getReadName());
    super.setCigarString(read.getCigarString());
    super.setReadBases(read.getReadBases());
    super.setBaseQualities(read.getBaseQualities());
    // From SAMRecord constructor: Do this after the above because setCigarString will clear it.
    GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read));
  }
Ejemplo n.º 7
0
  public void test4Gap() throws Exception {
    //
    // CCATTCAGTTGGAGACGTTGTGGACCTGACGCCTCTGCTCTTGCAAGTCAGGACAT
    // 24106       67      paolo-bac       420     255     16=4I1=1I2=4N10=        paolo-bac
    // 735     315     CAGTTGGAGACGTTGTGNATGTGNACGCCTCTGC      213.3/22..103350/!2,2+/!14/-+-4//5
    //   AS:i:6  NM:i:5  MQ:i:255        XU:Z:5=1B12=4I1=1I2=4N10=       XQ:Z:1  XR:Z:GNATT
    // XA:i:10 IH:i:1
    final SuperCigarValidator validator = new SuperCigarValidator(0);
    final SAMRecord samrec = new SAMRecord(null);

    samrec.setAlignmentStart(6);
    samrec.setCigarString("16=4I1=1I2=4N10=");
    samrec.setReadString("CAGTTGGAGACGTTGTGNATGTGNACGCCTCTGC");
    samrec.setBaseQualityString("213.3/22..103350/!2,2+/!14/-+-4//5");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=1B12=4I1=1I2=4N10=");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "1");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "GNATT");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 7);
    samrec.setFlags(67);
    // CAGTT GGAGACGTTGTGNATG    T GN   ACGCCTCTGC
    validator.setData(
        samrec,
        DnaUtils.encodeString("CAGTTTGGAGACGTTGTGNATGTGN     ACGCCTCTGC".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality(
            "213.31/22..103350/!2,2+/!14/-+-4//5")); //   tttgt  aggtcggataaggcgttcgg     atccgacacg
    validator.setTemplate(
        DnaUtils.encodeString("CCATTCAGTTGGAGACGTTGTGGACCTGACGCCTCTGCTCTTGCAAGTCAGGACAT"));

    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 6);
    validator.setData(
        samrec,
        DnaUtils.encodeString("CAGTTTGGAGACGTTGTGNATGTGN     ACGCCTCTGC".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality(
            "213.31/22..103350/!2,2+/!14/-+-4//5")); //   tttgt  aggtcggataaggcgttcgg     atccgacacg
    validator.setTemplate(
        DnaUtils.encodeString("CCATTCAGTTGGAGACGTTGTGGACCTGACGCCTCTGCTCTTGCAAGTCAGGACAT"));

    validator.parse();
    assertFalse(validator.getInvalidReason(), validator.isValid());
    assertEquals(
        "Super cigar alignment score was 7, but AS attribute was 6", validator.getInvalidReason());
  }
Ejemplo n.º 8
0
  public void testCGOverlapWithDeletion2() throws Exception {
    // check that it is OK to not provide XQ if the overlap is deleted from the template
    final SuperCigarValidator validator = new SuperCigarValidator(0);

    final SAMRecord samrec = new SAMRecord(null);
    //    GGGCCTGCAC
    //              DDD
    //               BB
    //               TGGCCAAGGAGCTGTGTGA
    //    GGGCCTGCACCTGGCCAAGGAGCTGTGTGA
    //
    samrec.setAlignmentStart(1);
    samrec.setCigarString("10=1D19=");
    samrec.setReadString("GGGCCTGCACTGGCCAAGGAGCTGTGTGA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=3D2B19=");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 4);
    samrec.setBaseQualityString("/////////////////////////////");
    samrec.setFlags(131);
    final byte[] sdfRead =
        DnaUtils.encodeString("GGGCCTGCACTGGCCAAGGAGCTGTGTGA".replaceAll(" ", ""));
    final byte[] sdfQualities = FastaUtils.asciiToRawQuality("/////////////////////////////");
    validator.setData(samrec, sdfRead, sdfQualities);
    validator.setTemplate(DnaUtils.encodeString("GGGCCTGCACCTGGCCAAGGAGCTGTGTGA"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    // theoretical alignment that probably isn't handled:
    //    ACG
    //       DDD
    //          G
    //         BB
    //         C
    //          D
    //           TACGTACGTACGT
    //    ACGTACGTACGTACGTACGT
    //    The overlap actually has a match on either side of it, however no template position is
    // repeated
    //    in a match or mismatch so would not result in a flattened read needing an XQ field.
  }
Ejemplo n.º 9
0
  public void testSoftClip() throws Exception {
    final SuperCigarValidator validator = new SuperCigarValidator(0);
    final SAMRecord samrec = new SAMRecord(null);

    samrec.setAlignmentStart(1);
    samrec.setCigarString("3S2=18=5N9=1X");
    samrec.setReadString("AGCCCACACGTAAATAAGACATCACGATGATCA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "3S2=2B20=5N9=1X");
    samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "%6");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "AGCA");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 4);
    samrec.setFlags(67);
    validator.setData(
        samrec,
        DnaUtils.encodeString("AGCCCCCACACGTAAATAAGACATC     ACGATGATCA".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(DnaUtils.encodeString("CCACACGTAAATAAGACATCGGGGGACGATGATCG"));

    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    // Alignment mismatch 3617884      153     chr18   1       255     4S19=7N2=1X7=   *       *
    //   *       AAAACCCTAACCCTAACCCTAACCCCAACCCTA       998140-,7::;26;;.39'(2347-88989+7
    // AS:i:2  NM:i:1  XU:Z:5=2B20=7N2=1X7=    XR:Z:C  XQ:Z:42 IH:i:1
    samrec.setAlignmentStart(1);
    samrec.setCigarString("4S19=7N2=1X7=");
    samrec.setReadString("AAAACCCTAACCCTAACCCTAACCCCAACCCTA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "4S1=2B1S19=7N2=1X7=");
    samrec.setBaseQualityString("998140-,7::;26;;.39'(2347-88989+7");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "42");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "AAAAAC");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 6);
    samrec.setFlags(67);
    validator.setData(
        samrec,
        DnaUtils.encodeString("AAAACACCCTAACCCTAACCCTAAC     CCCAACCCTA".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("99814420-,7::;26;;.39'(2347-88989+7"));
    validator.setTemplate(DnaUtils.encodeString("CCCTAACCCTAACCCTAACCCTTACCCCTAACCCTA"));

    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    // 153
    // TAGGGTTGGG     GTTAGGGTTAGGGTTAGGGTGTTTT"),
    // DnaUtils.fastqToPhred("7+98988-7432('93.;;62;::7,-02441899"));

    samrec.setAlignmentStart(1);
    samrec.setCigarString("10=7N19=4S");
    samrec.setReadString("AGCCCACACGTAAATAAGACATCACGATGATCA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=7N19=1S2B1=4S");
    samrec.setBaseQualityString("8:::::79:775986<=<<96576767679808");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "88");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "AATCA");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 5);
    samrec.setFlags(139);
    validator.setData(
        samrec,
        DnaUtils.encodeString("AGCCCACACG     TAAATAAGACATCACGATGAGATCA".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("8:::::79:775986<=<<9657676768879808"));
    validator.setTemplate(DnaUtils.encodeString("AGCCCACACGTTCCCCTTAAATAAGACATCACGATG"));

    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    samrec.setFlags(115);
    validator.setData(
        samrec,
        DnaUtils.encodeString("TGATCTCATCGTGATGTCTTATTTA     CGTGTGGGCT".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("8089788676767569<<=<689577:97:::::8"));

    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());
  }
Ejemplo n.º 10
0
  public void testQualities() throws Exception {

    // first, non-rc
    /*237726  67      simulatedSequence1      3       255     22=6N10=        =       170     167     TGCCCCCCTGAGAATGAATGTTGGACGAAATA        )*N\S\7@*`[4DRA8VKE-JF:KP0<D:/"K        AS:i:0  NM:i:0  MQ:i:255        XU:Z:5=3B20=6N10=
                                                                                             TGCCCCCCCCCTGAGAATGAATGTTGGACGAAATA
                                                                                             )*N\SV55\7@*`[4DRA8VKE-JF:KP0<D:/"K


    XQ:Z:V55        XA:i:1  IH:i:1*/

    final SuperCigarValidator validator = new SuperCigarValidator(0);
    final SAMRecord samrec = new SAMRecord(null);

    samrec.setAlignmentStart(3);
    samrec.setCigarString("22=6N10=");
    samrec.setReadString("tgcccccctgagaatgaatgttggacgaaata");
    samrec.setBaseQualityString(")*N!S!7@*`[4DRA8VKE-JF:KP0<D:/!K");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=3B20=6N10=");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "V55");
    samrec.setFlags(67);

    validator.setData(
        samrec,
        DnaUtils.encodeString("tgccccccccctgagaatgaatgtt     ggacgaaata".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality(
            ")*N!SV55!7@*`[4DRA8VKE-JF:KP0<D:/!K")); //   tttgt  aggtcggataaggcgttcgg     atccgacacg
    validator.setTemplate(
        DnaUtils.encodeString("attgcccccctgagaatgaatgttatgtacggacgaaatatgtaaccata"));

    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    // second, non-rc
    /*184100  131     simulatedSequence1      191     255     10=6N4=1X18=    =       11      -180    AGCTTCTATAGCGGAATTGAGCGGAACCGCACG       YTD$B1L!`_<"L'V8W=72T#YU]K@,#KUA>       AS:i:1  NM:i:1  MQ:i:255        XU:Z:10=6N4=1X15=2B5=   XQ:Z:%* XR:Z:A  XA:i:1  IH:i:1
                                                                                                   AGCTTCTATAGCGGAATTGAGCGGAACCGCGCACG
                                                                                                   YTD$B1L!`_<"L'V8W=72T#YU]K@,%*#KUA>
    */
    samrec.setAlignmentStart(2);
    samrec.setCigarString("10=6N4=1X18=");
    samrec.setReadString("AGCTTCTATAGCGGAATTGAGCGGAACCGCACG");
    samrec.setBaseQualityString("YTD$B1L!`_<!L'V8W=72T#YU]K@,#KUA>");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=6N4=1X15=2B5=");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "%*");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "A");
    samrec.setFlags(131);
    validator.setData(
        samrec,
        DnaUtils.encodeString("AGCTTCTATA     GCGGAATTGAGCGGAACCGCGCACG".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality(
            "YTD$B1L!`_<!L'V8W=72T#YU]K@,%*#KUA>")); //    YTD$B1L!`_      <!L'V8W=72T#YU]K@,%*#KUA>
    validator.setTemplate(
        DnaUtils.encodeString("TAGCTTCTATAGGGGGCGCGGTATTGAGCGGAACCGCACGTGCTATTTTCC"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    // first, rc
    /*137629  115     simulatedSequence1      195     255     10=6N9=1I13=    =       13      -181    TCTATAGGGGTATTGAGCGAGAACCGCACGTGC       ^#R"E\`,),UQANL6J/J"G/P'^;<RIX4O$       AS:i:2  NM:i:1  MQ:i:255        XU:Z:10=6N9=1I10=2B5=   XQ:Z:<O XR:Z:A  XA:i:3  IH:i:1
                                                                                                   GCACGCGTGCGGTTCTCGCTCAATACCCCTATAGA
                                                                                                   $O4XIO<R<;^'P/G"J/J6LNAQU,),`\E"R#^
    */
    samrec.setAlignmentStart(6);
    samrec.setCigarString("10=6N9=1I13=");
    samrec.setReadString("TCTATAGGGGTATTGAGCGAGAACCGCACGTGC");
    samrec.setBaseQualityString("^#R!E!`,),UQANL6J/J!G/P'^;<RIX4O$");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=6N9=1I10=2B5=");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "<O");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "A");
    samrec.setFlags(115);
    validator.setData(
        samrec,
        DnaUtils.encodeString("GCACGCGTGCGGTTCTCGCTCAATA     CCCCTATAGA".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality(
            "$O4XIO<R<;^'P/G!J/J6LNAQU,),`!E!R#^")); //    YTD$B1L!`_      <!L'V8W=72T#YU]K@,%*#KUA>
    validator.setTemplate(
        DnaUtils.encodeString("TAGCTTCTATAGGGGGCGCGGTATTGAGCGGAACCGCACGTGCTATTTTCC"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    // second, rc
    /*137629  179     simulatedSequence1      13      255     9=1X13=6N10=    =       195     181     AGAATGAATCTTATGTACGGACGGTAACCATAA       ^#R"E,),UQANL6J/J"G/P'^;<R<OIX4O$       AS:i:1  NM:i:1  MQ:i:255        XU:Z:5=2B6=1X13=6N10=   XQ:Z:\` XR:Z:C  XA:i:3  IH:i:1
    TTATGGTTACCGTCCGTACATAAGATTCATATTCT
    $O4XIO<R<;^'P/G"J/J6LNAQU,),`\E"R#^
    */
    samrec.setAlignmentStart(2);
    samrec.setCigarString("9=1X13=6N10=");
    samrec.setReadString("AGAATGAATCTTATGTACGGACGGTAACCATAA");
    samrec.setBaseQualityString("^#R!E,),UQANL6J/J!G/P'^;<R<OIX4O$");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B6=1X13=6N10=");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "!`");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "C");
    samrec.setFlags(179);
    validator.setData(
        samrec,
        DnaUtils.encodeString("TTATGGTTAC     CGTCCGTACATAAGATTCATATTCT".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality(
            "$O4XIO<R<;^'P/G!J/J6LNAQU,),`!E!R#^")); //    YTD$B1L!`_      <!L'V8W=72T#YU]K@,%*#KUA>
    validator.setTemplate(DnaUtils.encodeString("GAGAATGAATGTTATGTACGGACGAAATATGTAACCATAACACC"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());
  }
Ejemplo n.º 11
0
  public void testMismatchFailures() throws Exception {
    final SuperCigarValidator validator = new SuperCigarValidator(0);
    final SAMRecord samrec = new SAMRecord(null);

    samrec.setAlignmentStart(1);
    samrec.setCigarString("2=1X13=5N20=");
    samrec.setReadString("GACGCCGAGGAAAAACAGGCGGATCGTCAGGAGTT");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "2=1X13=5N20=");
    samrec.setBaseQualityString("4316%%68883-56+141663,2.3----45/.,2");
    samrec.setFlags(73);
    validator.setData(
        samrec,
        DnaUtils.encodeString("GACGCCGAGG     AAAAACAGGCGGATCGTCAGGAGTT".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(DnaUtils.encodeString("GAGGCCGAGGGGGGGAAAAACAGGCGGATCGTCAGGAGTT"));

    try {
      validator.parse();
      assertFalse(validator.isValid());
      assertTrue(
          validator.getInvalidReason(),
          validator
              .getInvalidReason()
              .contains("Read delta (" + SamUtils.CG_READ_DELTA + ") too short, "));
    } catch (final AssertionError e) {
      assertEquals("readDelta.len=0 but should be 1", e.getMessage());
    }

    samrec.setAttribute(SamUtils.CG_READ_DELTA, "T");
    validator.setData(
        samrec,
        DnaUtils.encodeString("GACGCCGAGG     AAAAACAGGCGGATCGTCAGGAGTT".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.parse();
    assertFalse(validator.isValid());
    assertTrue(
        validator.getInvalidReason(),
        validator
            .getInvalidReason()
            .contains(SamUtils.CG_READ_DELTA + " value: T does not match read value: C"));

    validator.setData(
        samrec,
        DnaUtils.encodeString("GAGGCCGAGG     AAAAACAGGCGGATCGTCAGGAGTT".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.parse();
    assertFalse(validator.isValid());
    assertTrue(
        validator.getInvalidReason(), validator.getInvalidReason().contains("Expected mismatch"));

    samrec.setAlignmentStart(5);
    samrec.setCigarString("4=1I5=7N25=");
    samrec.setReadString("CTGTGGCATCGGGGGACCTGGGGCCCTCNCTGAGT");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "4=1I5=7N25=");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "A");
    validator.setData(
        samrec,
        DnaUtils.encodeString("CTGTGGCATC     GGGGGACCTGGGGCCCTCNCTGAGT".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(
        DnaUtils.encodeString(
            "TGTTCTGTG CATCTTCCCTTGGGGGACCTGNGGCCCTCACTGAGTGGGTCCTCCATGGGTGACTGGTGA"));
    validator.parse();
    assertFalse(validator.getInvalidReason(), validator.isValid());
    assertTrue(
        validator.getInvalidReason(),
        validator
            .getInvalidReason()
            .contains("SDF read insert: G does not match SAM " + SamUtils.CG_READ_DELTA + ": A,"));

    samrec.setAlignmentStart(2);
    samrec.setCigarString("23=6N10=");
    samrec.setReadString("CTTCAGCGATGGAGAAACTCGGGTGTCTACGTA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B20=6N10=");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, null);
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "%6");
    samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2");
    samrec.setFlags(179);
    validator.setData(
        samrec,
        DnaUtils.encodeString("TACGTAGACA     CCCGAGTGTCTCCATCGCTGTGAAG".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("2,./54----3.2,366141+65-38886%%6134"));
    validator.setTemplate(
        DnaUtils.encodeString("GCTTCAGCGATGGAGAAACTCGGGAAGTCGTGTCTACGTAGAACGTAGTT"));
    validator.parse();
    assertFalse(validator.getInvalidReason(), validator.isValid());
    assertTrue(
        validator.getInvalidReason(),
        validator.getInvalidReason().contains("Expected match, SDF read=C, template=A,"));

    samrec.setAlignmentStart(1);
    samrec.setCigarString("10M5N25M");
    samrec.setReadString("GAGGCCGAGGCAGGCGGATCGTCAGGAGTTAAAAA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=5N25=");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, null);
    samrec.setBaseQualityString("4316%668883-56+141663,2.3----45/.,2");
    samrec.setFlags(73);
    validator.setData(
        samrec,
        DnaUtils.encodeString("GAGGCCGAGG     CAGGCGGATCGTCAGGAGTTAAAAA".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(DnaUtils.encodeString("GAGGCCGAGGGGGGGCAGGCGGATCGTCAGGAGTTAAAAA"));
    validator.parse();
    assertFalse(validator.getInvalidReason(), validator.isValid());
    assertTrue(
        validator.getInvalidReason(),
        validator.getInvalidReason().contains("SDF and SAM qualities don't match,"));

    samrec.setAlignmentStart(1);
    samrec.setCigarString("25=5N10=");
    samrec.setReadString("tttgtaggtcggataaggcgttcatccgacacg");
    samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B20=5N10=");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "6");
    samrec.setReadNegativeStrandFlag(false);
    samrec.setFlags(67);
    try {
      validator.setData(
          samrec,
          DnaUtils.encodeString("tttgtgtaggtcggataaggcgttc     atccgacacg".replaceAll(" ", "")),
          FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
      fail();
    } catch (final BadSuperCigarException iae) {
      assertTrue(
          iae.getMessage(),
          iae.getMessage()
              .contains("SAM record qualities plus XQ not expected length. Was: 34 expected: 35"));
    }

    samrec.setAlignmentStart(1);
    samrec.setCigarString("25=5N10=");
    samrec.setReadString("tttgtaggtcggataaggcgttcatccgacacg");
    samrec.setBaseQualityString("4316%%68883-56+141663,2.3----45/.,2");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B20=5N10=");
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, null);
    samrec.setReadNegativeStrandFlag(false);
    samrec.setFlags(67);
    validator.setTemplate(DnaUtils.encodeString("tttgtaggtcggataaggcgttcgggggatccgacacg"));
    validator.setData(
        samrec,
        DnaUtils.encodeString("tttgtgtaggtcggataaggcgttc     atccgacacg".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.parse();

    assertFalse(validator.getInvalidReason(), validator.isValid());
    assertTrue(
        validator.getInvalidReason(),
        validator
            .getInvalidReason()
            .contains(
                "Overlap described but no " + SamUtils.CG_OVERLAP_QUALITY + " field present"));
  }
Ejemplo n.º 12
0
  public void testSimpleMatches() throws Exception {
    final SuperCigarValidator validator = new SuperCigarValidator(0);

    final SAMRecord samrec = new SAMRecord(null);

    samrec.setAlignmentStart(1);
    samrec.setCigarString("10M5N25M");
    samrec.setReadString("GAGGCCGAGGCAGGCGGATCGTCAGGAGTTAAAAA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=5N25=");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 0);
    samrec.setBaseQualityString("4316%%68883-56+141663,2.3----45/.,2");
    samrec.setFlags(73);
    validator.setData(
        samrec,
        DnaUtils.encodeString("GAGGCCGAGG     CAGGCGGATCGTCAGGAGTTAAAAA".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(DnaUtils.encodeString("GAGGCCGAGGGGGGGCAGGCGGATCGTCAGGAGTTAAAAA"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    samrec.setCigarString("2=1X7=5N25=");
    samrec.setReadString("GACGCCGAGGCAGGCGGATCGTCAGGAGTTAAAAA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "2=1X7=5N25=");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "C");
    validator.setData(
        samrec,
        DnaUtils.encodeString("GACGCCGAGG     CAGGCGGATCGTCAGGAGTTAAAAA".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.parse();
    assertFalse(validator.getInvalidReason(), validator.isValid());

    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, null);
    validator.setData(
        samrec,
        DnaUtils.encodeString("GACGCCGAGG     CAGGCGGATCGTCAGGAGTTAAAAA".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    samrec.setAlignmentStart(5);
    samrec.setCigarString("4=1D6=5N25=");
    samrec.setReadString("CTGTCATCTTACCTGGGGCCCTCNCTGAGTGGGTC");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "4=1D6=5N25=");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, null);
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 2);
    validator.setData(
        samrec,
        DnaUtils.encodeString("CTGTCATCTT     ACCTGGGGCCCTCNCTGAGTGGGTC".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(
        DnaUtils.encodeString("TGTTCTGTGCATCTTCCCTTACCTGNGGCCCTCACTGAGTGGGTCCTCCATGGGTGACTGGTGA"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    samrec.setAlignmentStart(5);
    samrec.setCigarString("4=1I5=7N25=");
    samrec.setReadString("CTGTAGCATCACCTGGGGCCCTCNCTGAGTGGGTC");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "4=1I5=7N25=");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "A");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, null);
    validator.setData(
        samrec,
        DnaUtils.encodeString("CTGTAGCATC     ACCTGGGGCCCTCNCTGAGTGGGTC".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    samrec.setAlignmentStart(5);
    samrec.setCigarString("4=2D6=5N25=");
    samrec.setReadString("CTGTCATCTTACCTGGGGCCCTCNCTGAGTGGGTC");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "4=2D6=5N25=");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, null);
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 3);
    validator.setData(
        samrec,
        DnaUtils.encodeString("CTGTCATCTT     ACCTGGGGCCCTCNCTGAGTGGGTC".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2"));
    validator.setTemplate(
        DnaUtils.encodeString("TGTTCTGTGGCATCTTCCCTTACCTGNGGCCCTCACTGAGTGGGTCCTCCATGGGTGACTGGTGA"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    // reverse complement
    samrec.setAlignmentStart(2);
    samrec.setCigarString("23=6N10=");
    samrec.setReadString("CTTCAGCGATGGAGAAACTCGGGTGTCTACGTA");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B20=6N10=");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, null);
    samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "%6");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 0);
    samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2");
    samrec.setFlags(179);
    validator.setData(
        samrec,
        DnaUtils.encodeString("TACGTAGACA     CCCGAGTTTCTCCATCGCTGTGAAG".replaceAll(" ", "")),
        FastaUtils.asciiToRawQuality("2,./54----3.2,366141+65-38886%%6134"));
    validator.setTemplate(
        DnaUtils.encodeString("GCTTCAGCGATGGAGAAACTCGGGAAGTCGTGTCTACGTAGAACGTAGTT"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());
  }
Ejemplo n.º 13
0
  public void testCGOverlapWithDeletion() throws Exception {
    // check that it is OK to not provide XQ if the overlap is deleted from the template
    final SuperCigarValidator validator = new SuperCigarValidator(0);

    final SAMRecord samrec = new SAMRecord(null);

    samrec.setAlignmentStart(1);
    samrec.setCigarString("29=");
    samrec.setReadString("AGGCAGGTAGATCATGAGGTGAAGAGATC");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "19=2B2D10=");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 3);
    samrec.setBaseQualityString("/////////////////////////////");
    samrec.setFlags(179);
    final byte[] sdfRead =
        DnaUtils.encodeString("GATCTCTTCACCTCATGATCTACCTGCCT".replaceAll(" ", ""));
    final byte[] sdfQualities = FastaUtils.asciiToRawQuality("/////////////////////////////");
    validator.setData(samrec, sdfRead, sdfQualities);
    validator.setTemplate(DnaUtils.encodeString("AGGCAGGTAGATCATGAGGTGAAGAGATC"));
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "19=2B2N10=");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 0);
    validator.setData(samrec, sdfRead, sdfQualities);
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "19=2B2H10=");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 0);
    validator.setData(samrec, sdfRead, sdfQualities);
    validator.parse();
    assertTrue(validator.getInvalidReason(), validator.isValid());

    samrec.setCigarString("19=2D8=");
    samrec.setReadString(
        "TGGCAGGTAGATCATGAGGAAGAGATC"); // <- this doesn't seem to be checked by anything
    samrec.setBaseQualityString("/////////////////////////////");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "19=2B1X1=2D8=");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 4);
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "T");
    validator.setData(samrec, sdfRead, sdfQualities);
    validator.parse();
    assertFalse(validator.isValid());
    assertTrue(
        validator.getInvalidReason(),
        validator
            .getInvalidReason()
            .contains("Overlap described but no XQ field present in SAM record"));

    samrec.setReadString("AGGCAGGTAGATCATGAGGAAGAGATC");
    samrec.setBaseQualityString("/////////////////////////////");
    samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "19=2B1X1=3X3=2X");
    samrec.setAttribute(SamUtils.CG_READ_DELTA, "TAAGTC");
    samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 6);
    validator.setData(samrec, sdfRead, sdfQualities);
    validator.parse();
    assertFalse(validator.isValid());
    assertTrue(
        validator.getInvalidReason(),
        validator
            .getInvalidReason()
            .contains("Overlap described but no XQ field present in SAM record"));
  }
  @Override
  public void execute() {
    log.info("Initializing kmer code map...");
    Map<Character, Integer> kmerCodeIndices = new HashMap<Character, Integer>();
    kmerCodeIndices.put('0', 1);
    kmerCodeIndices.put('A', 3);
    kmerCodeIndices.put('B', 4);
    kmerCodeIndices.put('C', 5);
    kmerCodeIndices.put('_', 6);
    kmerCodeIndices.put('.', 7);
    kmerCodeIndices.put('1', 9);

    Map<Character, String> kmerCodeNames = new LinkedHashMap<Character, String>();
    kmerCodeNames.put('0', "ref0");
    kmerCodeNames.put('A', "repetitive");
    kmerCodeNames.put('B', "both");
    kmerCodeNames.put('C', "lowcoverage");
    kmerCodeNames.put('_', "lowconfidence");
    kmerCodeNames.put('.', "novel");
    kmerCodeNames.put('1', "ref1");

    if (KMER_CODE_NAMES != null) {
      for (Character c : kmerCodeNames.keySet()) {
        String cStr = String.valueOf(c);
        if (KMER_CODE_NAMES.containsKey(cStr)) {
          kmerCodeNames.put(c, KMER_CODE_NAMES.get(cStr));
        }
      }
    }

    for (Character c : kmerCodeNames.keySet()) {
      log.info("  {} {}: {}", c, kmerCodeIndices.get(c), kmerCodeNames.get(c));
    }

    log.info("Loading annotated contigs...");
    Map<String, Map<String, String>> annotatedContigs = new HashMap<String, Map<String, String>>();
    int kmerSize = 0;

    if (ANN.length() > 0) {
      TableReader tr = new TableReader(ANN);
      for (Map<String, String> te : tr) {
        String contigName = te.get("contigName");

        if (kmerSize == 0) {
          kmerSize = te.get("seq").length() - te.get("kmerOrigin").length() + 1;
        }

        annotatedContigs.put(contigName, te);

        String[] ref0ToCanonicalExact =
            (te.get("ref0ToCanonicalExact").equals("NA")
                        || te.get("ref0ToCanonicalExact").equals("*:0-0")
                    ? "NA:0-0"
                    : te.get("ref0ToCanonicalExact"))
                .split("[:-]");
        String[] ref1ToCanonicalExact =
            (te.get("ref1ToCanonicalExact").equals("NA")
                        || te.get("ref1ToCanonicalExact").equals("*:0-0")
                    ? "NA:0-0"
                    : te.get("ref1ToCanonicalExact"))
                .split("[:-]");

        cout.println(
            te.get("sampleName")
                + "_"
                + te.get("accession")
                + "_"
                + contigName
                + " "
                + ref0ToCanonicalExact[0]
                + " "
                + ref0ToCanonicalExact[1]
                + " "
                + ref0ToCanonicalExact[2]
                + " radius1=0.8r");
        cout.println(
            te.get("sampleName")
                + "_"
                + te.get("accession")
                + "_"
                + contigName
                + " "
                + ref1ToCanonicalExact[0]
                + " "
                + ref1ToCanonicalExact[1]
                + " "
                + ref1ToCanonicalExact[2]
                + " radius2=0.6r");
      }
    }

    log.info("    contigs: {}", annotatedContigs.size());
    log.info("  kmer size: {}", kmerSize);

    log.info("Computing kmer inheritance information...");

    SAMFileHeader sfh = CONTIGS.getFileHeader();
    for (Character c : kmerCodeNames.keySet()) {
      SAMReadGroupRecord rgr = new SAMReadGroupRecord(kmerCodeNames.get(c));
      rgr.setSample(kmerCodeNames.get(c));
      sfh.addReadGroup(rgr);
    }

    SAMFileWriterFactory sfwf = new SAMFileWriterFactory();
    sfwf.setCreateIndex(true);
    SAMFileWriter sfw = sfwf.makeBAMWriter(sfh, false, bout);

    TableWriter tw = new TableWriter(sout);

    Set<IGVEntry> igvEntries = new TreeSet<IGVEntry>();
    int numContigs = 0;
    for (SAMRecord contig : CONTIGS) {
      if (CONTIG_NAMES == null
          || CONTIG_NAMES.isEmpty()
          || CONTIG_NAMES.contains(contig.getReadName())) {
        Map<String, String> te = annotatedContigs.get(contig.getReadName());

        if (annotatedContigs.containsKey(contig.getReadName())) {
          String seq = contig.getReadString();

          // log.debug("  te: {}", te);

          String annSeq = te.get("seq");
          String kmerOrigin = te.get("kmerOrigin");

          Map<CortexKmer, Character> kmerCodes = new HashMap<CortexKmer, Character>();
          for (int i = 0; i < kmerOrigin.length(); i++) {
            CortexKmer kmer = new CortexKmer(annSeq.substring(i, i + kmerSize));
            Character code = kmerOrigin.charAt(i);

            kmerCodes.put(kmer, code);
          }

          Map<Character, Integer> kmerStats = new HashMap<Character, Integer>();
          for (Character c : kmerCodeNames.keySet()) {
            kmerStats.put(c, 0);
          }

          boolean changed = false;

          // We want to be able to examine soft-clipped regions as well.
          List<CigarElement> ces = new ArrayList<CigarElement>();
          for (CigarElement ce : contig.getCigar().getCigarElements()) {
            if (ce.getOperator().equals(CigarOperator.S)) {
              ces.add(new CigarElement(ce.getLength(), CigarOperator.M));
              changed = true;
            } else {
              ces.add(ce);
            }
          }

          if (changed) {
            CigarElement firstCe = contig.getCigar().getCigarElements().get(0);

            if (firstCe.getOperator().equals(CigarOperator.S)) {
              contig.setAlignmentStart(contig.getAlignmentStart() - firstCe.getLength());
            }

            contig.setCigar(new Cigar(ces));
          }

          for (AlignmentBlock ab : contig.getAlignmentBlocks()) {
            for (int i = ab.getReadStart() - 1; i < ab.getReadStart() + ab.getLength(); i++) {
              if (i + kmerSize < seq.length()) {
                CortexKmer kmer = new CortexKmer(seq.substring(i, i + kmerSize));

                SAMRecord skmer = new SAMRecord(CONTIGS.getFileHeader());
                skmer.setReadBases(seq.substring(i, i + kmerSize).getBytes());

                List<CigarElement> cigarElements = new ArrayList<CigarElement>();
                cigarElements.add(new CigarElement(kmerSize, CigarOperator.M));
                Cigar cigar = new Cigar(cigarElements);

                skmer.setReadName(contig.getReadName() + "." + kmer.getKmerAsString());
                skmer.setReferenceName(contig.getReferenceName());
                skmer.setCigar(cigar);
                skmer.setReadPairedFlag(false);
                skmer.setDuplicateReadFlag(false);
                skmer.setMateNegativeStrandFlag(false);
                skmer.setAlignmentStart(ab.getReferenceStart() - ab.getReadStart() + 1 + i);
                skmer.setAttribute("RG", "none");
                skmer.setMappingQuality(0);

                Character c = kmerCodes.get(kmer);
                String codeName = kmerCodeNames.get(c);

                String parentReadGroupId = null;
                String sampleReadGroupId = null;
                for (SAMReadGroupRecord rgr : sfh.getReadGroups()) {
                  if (rgr.getSample().equals(codeName)) {
                    parentReadGroupId = rgr.getReadGroupId();
                  }

                  if (rgr.getSample().equals(contig.getReadGroup().getSample())) {
                    sampleReadGroupId = rgr.getReadGroupId();
                  }
                }

                skmer.setAttribute(
                    "RG", parentReadGroupId != null ? parentReadGroupId : sampleReadGroupId);
                skmer.setMappingQuality(99);

                sfw.addAlignment(skmer);

                kmerStats.put(c, kmerStats.get(c) + 1);

                IGVEntry igvEntry = new IGVEntry();
                igvEntry.chromosome = contig.getReferenceName();
                igvEntry.start = ab.getReferenceStart() - ab.getReadStart() + i;
                igvEntry.parentageName = kmerCodeNames.get(c);
                igvEntry.parentage = kmerCodeIndices.get(c);
                igvEntries.add(igvEntry);
              }
            }
          }

          if (!contig.isSecondaryOrSupplementary()) {
            beout.println(
                contig.getReferenceName()
                    + "\t"
                    + contig.getAlignmentStart()
                    + "\t"
                    + contig.getAlignmentEnd()
                    + "\t"
                    + contig.getReadName()
                    + "."
                    + contig.getReadGroup().getSample());

            if (annotatedContigs.size() > 10 && numContigs % (annotatedContigs.size() / 10) == 0) {
              log.info("  processed {}/{} contigs", numContigs, annotatedContigs.size());
            }
            numContigs++;
          }

          Map<String, String> stats = new LinkedHashMap<String, String>();
          stats.put("contigName", contig.getReadName());
          stats.put("sampleName", contig.getReadGroup().getSample());
          for (Character c : kmerCodeNames.keySet()) {
            stats.put(kmerCodeNames.get(c), String.valueOf(kmerStats.get(c)));
          }
          tw.addEntry(stats);
        }
      }
    }

    log.info("Writing kmer inheritance information...");
    out.printf("%s\t%s\t%s\t%s\t%s\n", "Chromosome", "Start", "End", "Feature", "Parentage");
    for (IGVEntry igvEntry : igvEntries) {
      out.printf(
          "%s\t%d\t%d\t%s\t%d\n",
          igvEntry.chromosome,
          igvEntry.start,
          igvEntry.start + 1,
          igvEntry.parentageName,
          igvEntry.parentage);
    }

    sfw.close();
  }