public void testOverlapCigarConsistencyRev() { final byte[] read = DnaUtils.encodeString( DnaUtils.reverseComplement( "TAGGCGGGTTGCCAA TTAACTTGTA GTCCTTGACA".replaceAll(" ", ""))); final byte[] tmpl = DnaUtils.encodeString("TAGGG TGGCCAA TTAACTTGTAGTGTGCGTCCTTGACA".replaceAll(" ", "")); final int[] actions = ActionsHelper.build("==========NNNNNN===============X====BBBX====", 0, 3); final AlignmentResult ar = new AlignmentResult(read, actions, tmpl); ar.setIdentifyingInfo(false, true); // System.out.println("cigar:" + ar.getCigarString(1, true) + " readString:" + ar.readString()); final SAMRecord samrec = new SAMRecord(null); samrec.setCigarString(ar.getCigarString(true, false)); samrec.setReadString(DnaUtils.reverseComplement(ar.readString())); samrec.setAttribute(SamUtils.ATTRIBUTE_NUM_MISMATCHES, 2); samrec.setAlignmentStart(1); samrec.setFlags(179); final NgsParams params = new NgsParamsBuilder() .gapOpenPenalty(EditDistanceFactory.DEFAULT_GAP_OPEN_PENALTY) .gapExtendPenalty(EditDistanceFactory.DEFAULT_GAP_EXTEND_PENALTY) .substitutionPenalty(EditDistanceFactory.DEFAULT_SUBSTITUTION_PENALTY) .unknownsPenalty(0) .create(); try (MemoryPrintStream mps = new MemoryPrintStream()) { final SamValidator sv = new SamValidator( mps.printStream(), mps.printStream(), true, false, false, false, params, false); assertEquals( 2 * EditDistanceFactory.DEFAULT_SUBSTITUTION_PENALTY, sv.isAtExpectedRef(tmpl, samrec, null)); } }
public void testUnknownPenalty() throws Exception { final SuperCigarValidator validator = new SuperCigarValidator(1); final SAMRecord samrec = new SAMRecord(null); samrec.setAlignmentStart(1); samrec.setCigarString("8=1X14=5N10="); samrec.setReadString("AGCCCCCNTACGTAAATAAGACATCACGATGATCA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "7=1R1X1T15=5N10="); samrec.setBaseQualityString("4316%%68883-56+141663,2.3----45/.,2"); samrec.setAttribute(SamUtils.CG_READ_DELTA, "TA"); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 1); samrec.setFlags(67); validator.setData( samrec, DnaUtils.encodeString("AGCCCCCNTACGTAAATAAGACATC ACGATGATCA".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate(DnaUtils.encodeString("AGCCCCCACNCGTAAATAAGACATCTTTTTACGATGATCA")); validator.parse(); assertFalse(validator.getInvalidReason(), validator.isValid()); assertEquals( validator.getInvalidReason(), "Super cigar alignment score was 3, but AS attribute was 1", validator.getInvalidReason()); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 3); validator.setData( samrec, DnaUtils.encodeString("AGCCCCCNTACGTAAATAAGACATC ACGATGATCA".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate(DnaUtils.encodeString("AGCCCCCACNCGTAAATAAGACATCTTTTTACGATGATCA")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); }
public void testOverlapPastStartPosition() throws Exception { final SuperCigarValidator validator = new SuperCigarValidator(0); final SAMRecord samrec = new SAMRecord(null); samrec.setAlignmentStart(3); samrec.setCigarString("2=2I16=6N10="); samrec.setReadString("ATAAGAAGGAGTGGCACTTCCCTCAGCTCA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "2=2I1=4B1X1=1I17=6N10="); samrec.setBaseQualityString("20001.1-+,8/0/41373,1751662362"); samrec.setAttribute(SamUtils.CG_READ_DELTA, "AAGG"); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, ").,1/"); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 6); samrec.setFlags(179); final String rawread = "TGAGCTGAGG GAAGTGCCACTCCTTCACTCCTTAT".replaceAll(" ", ""); final String rawqual = "2632661571 ,37314/0/8,+-1./1,.)10002".replaceAll(" ", ""); validator.setData( samrec, DnaUtils.encodeString(rawread), FastaUtils.asciiToRawQuality(rawqual)); validator.setTemplate( DnaUtils.encodeString( "TCAT GAAGGAGTGGCACTTCCACCTGCCTCAGCTCATGCGTGATATCCAGG".replaceAll(" ", ""))); // ATAAGAAGGAGTGGCACTTCCCTCAGCTCA // ATAAG // TCA TGAAGGAGTGGCACTTCCACCTGCCTCAGCTCATGCGTGATATCCAGG // GAGTGAAGGAGTGGCACTTC CCTCAGCTCA validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); }
public void testDegenerate() throws Exception { final SuperCigarValidator validator = new SuperCigarValidator(0); final SAMRecord samrec = new SAMRecord(null); samrec.setAlignmentStart(1); samrec.setCigarString("2=1X13=5N20="); samrec.setReadString("GACGCCGAGGAAAAACAGGCGGATCGTCAGGAGTT"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "2=1X13=5N20="); samrec.setBaseQualityString("4316%%68883-56+141663,2.3----45/.,2"); samrec.setFlags(73); validator.setData( samrec, DnaUtils.encodeString(" "), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate(DnaUtils.encodeString("GAGGCCGAGGGGGGGAAAAACAGGCGGATCGTCAGGAGTT")); validator.parse(); assertFalse(validator.isValid()); samrec.setFlags(115); validator.setData( samrec, DnaUtils.encodeString(" "), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.parse(); assertFalse(validator.isValid()); }
public void testCgOverlap() throws Exception { final SuperCigarValidator validator = new SuperCigarValidator(0); final SAMRecord samrec = new SAMRecord(null); samrec.setAlignmentStart(1); samrec.setCigarString("25=5N10="); samrec.setReadString("tttgtaggtcggataaggcgttcatccgacacg"); samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B20=5N10="); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "%6"); samrec.setReadNegativeStrandFlag(false); samrec.setFlags(67); validator.setData( samrec, DnaUtils.encodeString("tttgtgtaggtcggataaggcgttc atccgacacg".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate(DnaUtils.encodeString("tttgtaggtcggataaggcgttcgggggatccgacacg")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); samrec.setCigarString("3=1X21=5N10="); samrec.setReadString("tttataggtcggataaggcgttcatccgacacg"); samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B1X19=5N10="); samrec.setAttribute(SamUtils.CG_READ_DELTA, "A"); validator.setData( samrec, DnaUtils.encodeString("tttgtataggtcggataaggcgttc atccgacacg".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); }
/** * HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY * * @param read */ public GATKSAMRecord(final SAMRecord read) { super(read.getHeader()); super.setReferenceIndex(read.getReferenceIndex()); super.setAlignmentStart(read.getAlignmentStart()); super.setReadName(read.getReadName()); super.setMappingQuality(read.getMappingQuality()); // indexing bin done below super.setCigar(read.getCigar()); super.setFlags(read.getFlags()); super.setMateReferenceIndex(read.getMateReferenceIndex()); super.setMateAlignmentStart(read.getMateAlignmentStart()); super.setInferredInsertSize(read.getInferredInsertSize()); SAMReadGroupRecord samRG = read.getReadGroup(); SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read); if (samAttr == null) { clearAttributes(); } else { setAttributes(samAttr); } if (samRG != null) { GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); setReadGroup(rg); } super.setFileSource(read.getFileSource()); super.setReadName(read.getReadName()); super.setCigarString(read.getCigarString()); super.setReadBases(read.getReadBases()); super.setBaseQualities(read.getBaseQualities()); // From SAMRecord constructor: Do this after the above because setCigarString will clear it. GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read)); }
public void test4Gap() throws Exception { // // CCATTCAGTTGGAGACGTTGTGGACCTGACGCCTCTGCTCTTGCAAGTCAGGACAT // 24106 67 paolo-bac 420 255 16=4I1=1I2=4N10= paolo-bac // 735 315 CAGTTGGAGACGTTGTGNATGTGNACGCCTCTGC 213.3/22..103350/!2,2+/!14/-+-4//5 // AS:i:6 NM:i:5 MQ:i:255 XU:Z:5=1B12=4I1=1I2=4N10= XQ:Z:1 XR:Z:GNATT // XA:i:10 IH:i:1 final SuperCigarValidator validator = new SuperCigarValidator(0); final SAMRecord samrec = new SAMRecord(null); samrec.setAlignmentStart(6); samrec.setCigarString("16=4I1=1I2=4N10="); samrec.setReadString("CAGTTGGAGACGTTGTGNATGTGNACGCCTCTGC"); samrec.setBaseQualityString("213.3/22..103350/!2,2+/!14/-+-4//5"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=1B12=4I1=1I2=4N10="); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "1"); samrec.setAttribute(SamUtils.CG_READ_DELTA, "GNATT"); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 7); samrec.setFlags(67); // CAGTT GGAGACGTTGTGNATG T GN ACGCCTCTGC validator.setData( samrec, DnaUtils.encodeString("CAGTTTGGAGACGTTGTGNATGTGN ACGCCTCTGC".replaceAll(" ", "")), FastaUtils.asciiToRawQuality( "213.31/22..103350/!2,2+/!14/-+-4//5")); // tttgt aggtcggataaggcgttcgg atccgacacg validator.setTemplate( DnaUtils.encodeString("CCATTCAGTTGGAGACGTTGTGGACCTGACGCCTCTGCTCTTGCAAGTCAGGACAT")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 6); validator.setData( samrec, DnaUtils.encodeString("CAGTTTGGAGACGTTGTGNATGTGN ACGCCTCTGC".replaceAll(" ", "")), FastaUtils.asciiToRawQuality( "213.31/22..103350/!2,2+/!14/-+-4//5")); // tttgt aggtcggataaggcgttcgg atccgacacg validator.setTemplate( DnaUtils.encodeString("CCATTCAGTTGGAGACGTTGTGGACCTGACGCCTCTGCTCTTGCAAGTCAGGACAT")); validator.parse(); assertFalse(validator.getInvalidReason(), validator.isValid()); assertEquals( "Super cigar alignment score was 7, but AS attribute was 6", validator.getInvalidReason()); }
public void testCGOverlapWithDeletion2() throws Exception { // check that it is OK to not provide XQ if the overlap is deleted from the template final SuperCigarValidator validator = new SuperCigarValidator(0); final SAMRecord samrec = new SAMRecord(null); // GGGCCTGCAC // DDD // BB // TGGCCAAGGAGCTGTGTGA // GGGCCTGCACCTGGCCAAGGAGCTGTGTGA // samrec.setAlignmentStart(1); samrec.setCigarString("10=1D19="); samrec.setReadString("GGGCCTGCACTGGCCAAGGAGCTGTGTGA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=3D2B19="); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 4); samrec.setBaseQualityString("/////////////////////////////"); samrec.setFlags(131); final byte[] sdfRead = DnaUtils.encodeString("GGGCCTGCACTGGCCAAGGAGCTGTGTGA".replaceAll(" ", "")); final byte[] sdfQualities = FastaUtils.asciiToRawQuality("/////////////////////////////"); validator.setData(samrec, sdfRead, sdfQualities); validator.setTemplate(DnaUtils.encodeString("GGGCCTGCACCTGGCCAAGGAGCTGTGTGA")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); // theoretical alignment that probably isn't handled: // ACG // DDD // G // BB // C // D // TACGTACGTACGT // ACGTACGTACGTACGTACGT // The overlap actually has a match on either side of it, however no template position is // repeated // in a match or mismatch so would not result in a flattened read needing an XQ field. }
public void testSoftClip() throws Exception { final SuperCigarValidator validator = new SuperCigarValidator(0); final SAMRecord samrec = new SAMRecord(null); samrec.setAlignmentStart(1); samrec.setCigarString("3S2=18=5N9=1X"); samrec.setReadString("AGCCCACACGTAAATAAGACATCACGATGATCA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "3S2=2B20=5N9=1X"); samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2"); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "%6"); samrec.setAttribute(SamUtils.CG_READ_DELTA, "AGCA"); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 4); samrec.setFlags(67); validator.setData( samrec, DnaUtils.encodeString("AGCCCCCACACGTAAATAAGACATC ACGATGATCA".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate(DnaUtils.encodeString("CCACACGTAAATAAGACATCGGGGGACGATGATCG")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); // Alignment mismatch 3617884 153 chr18 1 255 4S19=7N2=1X7= * * // * AAAACCCTAACCCTAACCCTAACCCCAACCCTA 998140-,7::;26;;.39'(2347-88989+7 // AS:i:2 NM:i:1 XU:Z:5=2B20=7N2=1X7= XR:Z:C XQ:Z:42 IH:i:1 samrec.setAlignmentStart(1); samrec.setCigarString("4S19=7N2=1X7="); samrec.setReadString("AAAACCCTAACCCTAACCCTAACCCCAACCCTA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "4S1=2B1S19=7N2=1X7="); samrec.setBaseQualityString("998140-,7::;26;;.39'(2347-88989+7"); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "42"); samrec.setAttribute(SamUtils.CG_READ_DELTA, "AAAAAC"); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 6); samrec.setFlags(67); validator.setData( samrec, DnaUtils.encodeString("AAAACACCCTAACCCTAACCCTAAC CCCAACCCTA".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("99814420-,7::;26;;.39'(2347-88989+7")); validator.setTemplate(DnaUtils.encodeString("CCCTAACCCTAACCCTAACCCTTACCCCTAACCCTA")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); // 153 // TAGGGTTGGG GTTAGGGTTAGGGTTAGGGTGTTTT"), // DnaUtils.fastqToPhred("7+98988-7432('93.;;62;::7,-02441899")); samrec.setAlignmentStart(1); samrec.setCigarString("10=7N19=4S"); samrec.setReadString("AGCCCACACGTAAATAAGACATCACGATGATCA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=7N19=1S2B1=4S"); samrec.setBaseQualityString("8:::::79:775986<=<<96576767679808"); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "88"); samrec.setAttribute(SamUtils.CG_READ_DELTA, "AATCA"); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 5); samrec.setFlags(139); validator.setData( samrec, DnaUtils.encodeString("AGCCCACACG TAAATAAGACATCACGATGAGATCA".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("8:::::79:775986<=<<9657676768879808")); validator.setTemplate(DnaUtils.encodeString("AGCCCACACGTTCCCCTTAAATAAGACATCACGATG")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); samrec.setFlags(115); validator.setData( samrec, DnaUtils.encodeString("TGATCTCATCGTGATGTCTTATTTA CGTGTGGGCT".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("8089788676767569<<=<689577:97:::::8")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); }
public void testQualities() throws Exception { // first, non-rc /*237726 67 simulatedSequence1 3 255 22=6N10= = 170 167 TGCCCCCCTGAGAATGAATGTTGGACGAAATA )*N\S\7@*`[4DRA8VKE-JF:KP0<D:/"K AS:i:0 NM:i:0 MQ:i:255 XU:Z:5=3B20=6N10= TGCCCCCCCCCTGAGAATGAATGTTGGACGAAATA )*N\SV55\7@*`[4DRA8VKE-JF:KP0<D:/"K XQ:Z:V55 XA:i:1 IH:i:1*/ final SuperCigarValidator validator = new SuperCigarValidator(0); final SAMRecord samrec = new SAMRecord(null); samrec.setAlignmentStart(3); samrec.setCigarString("22=6N10="); samrec.setReadString("tgcccccctgagaatgaatgttggacgaaata"); samrec.setBaseQualityString(")*N!S!7@*`[4DRA8VKE-JF:KP0<D:/!K"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=3B20=6N10="); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "V55"); samrec.setFlags(67); validator.setData( samrec, DnaUtils.encodeString("tgccccccccctgagaatgaatgtt ggacgaaata".replaceAll(" ", "")), FastaUtils.asciiToRawQuality( ")*N!SV55!7@*`[4DRA8VKE-JF:KP0<D:/!K")); // tttgt aggtcggataaggcgttcgg atccgacacg validator.setTemplate( DnaUtils.encodeString("attgcccccctgagaatgaatgttatgtacggacgaaatatgtaaccata")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); // second, non-rc /*184100 131 simulatedSequence1 191 255 10=6N4=1X18= = 11 -180 AGCTTCTATAGCGGAATTGAGCGGAACCGCACG YTD$B1L!`_<"L'V8W=72T#YU]K@,#KUA> AS:i:1 NM:i:1 MQ:i:255 XU:Z:10=6N4=1X15=2B5= XQ:Z:%* XR:Z:A XA:i:1 IH:i:1 AGCTTCTATAGCGGAATTGAGCGGAACCGCGCACG YTD$B1L!`_<"L'V8W=72T#YU]K@,%*#KUA> */ samrec.setAlignmentStart(2); samrec.setCigarString("10=6N4=1X18="); samrec.setReadString("AGCTTCTATAGCGGAATTGAGCGGAACCGCACG"); samrec.setBaseQualityString("YTD$B1L!`_<!L'V8W=72T#YU]K@,#KUA>"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=6N4=1X15=2B5="); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "%*"); samrec.setAttribute(SamUtils.CG_READ_DELTA, "A"); samrec.setFlags(131); validator.setData( samrec, DnaUtils.encodeString("AGCTTCTATA GCGGAATTGAGCGGAACCGCGCACG".replaceAll(" ", "")), FastaUtils.asciiToRawQuality( "YTD$B1L!`_<!L'V8W=72T#YU]K@,%*#KUA>")); // YTD$B1L!`_ <!L'V8W=72T#YU]K@,%*#KUA> validator.setTemplate( DnaUtils.encodeString("TAGCTTCTATAGGGGGCGCGGTATTGAGCGGAACCGCACGTGCTATTTTCC")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); // first, rc /*137629 115 simulatedSequence1 195 255 10=6N9=1I13= = 13 -181 TCTATAGGGGTATTGAGCGAGAACCGCACGTGC ^#R"E\`,),UQANL6J/J"G/P'^;<RIX4O$ AS:i:2 NM:i:1 MQ:i:255 XU:Z:10=6N9=1I10=2B5= XQ:Z:<O XR:Z:A XA:i:3 IH:i:1 GCACGCGTGCGGTTCTCGCTCAATACCCCTATAGA $O4XIO<R<;^'P/G"J/J6LNAQU,),`\E"R#^ */ samrec.setAlignmentStart(6); samrec.setCigarString("10=6N9=1I13="); samrec.setReadString("TCTATAGGGGTATTGAGCGAGAACCGCACGTGC"); samrec.setBaseQualityString("^#R!E!`,),UQANL6J/J!G/P'^;<RIX4O$"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=6N9=1I10=2B5="); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "<O"); samrec.setAttribute(SamUtils.CG_READ_DELTA, "A"); samrec.setFlags(115); validator.setData( samrec, DnaUtils.encodeString("GCACGCGTGCGGTTCTCGCTCAATA CCCCTATAGA".replaceAll(" ", "")), FastaUtils.asciiToRawQuality( "$O4XIO<R<;^'P/G!J/J6LNAQU,),`!E!R#^")); // YTD$B1L!`_ <!L'V8W=72T#YU]K@,%*#KUA> validator.setTemplate( DnaUtils.encodeString("TAGCTTCTATAGGGGGCGCGGTATTGAGCGGAACCGCACGTGCTATTTTCC")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); // second, rc /*137629 179 simulatedSequence1 13 255 9=1X13=6N10= = 195 181 AGAATGAATCTTATGTACGGACGGTAACCATAA ^#R"E,),UQANL6J/J"G/P'^;<R<OIX4O$ AS:i:1 NM:i:1 MQ:i:255 XU:Z:5=2B6=1X13=6N10= XQ:Z:\` XR:Z:C XA:i:3 IH:i:1 TTATGGTTACCGTCCGTACATAAGATTCATATTCT $O4XIO<R<;^'P/G"J/J6LNAQU,),`\E"R#^ */ samrec.setAlignmentStart(2); samrec.setCigarString("9=1X13=6N10="); samrec.setReadString("AGAATGAATCTTATGTACGGACGGTAACCATAA"); samrec.setBaseQualityString("^#R!E,),UQANL6J/J!G/P'^;<R<OIX4O$"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B6=1X13=6N10="); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "!`"); samrec.setAttribute(SamUtils.CG_READ_DELTA, "C"); samrec.setFlags(179); validator.setData( samrec, DnaUtils.encodeString("TTATGGTTAC CGTCCGTACATAAGATTCATATTCT".replaceAll(" ", "")), FastaUtils.asciiToRawQuality( "$O4XIO<R<;^'P/G!J/J6LNAQU,),`!E!R#^")); // YTD$B1L!`_ <!L'V8W=72T#YU]K@,%*#KUA> validator.setTemplate(DnaUtils.encodeString("GAGAATGAATGTTATGTACGGACGAAATATGTAACCATAACACC")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); }
public void testMismatchFailures() throws Exception { final SuperCigarValidator validator = new SuperCigarValidator(0); final SAMRecord samrec = new SAMRecord(null); samrec.setAlignmentStart(1); samrec.setCigarString("2=1X13=5N20="); samrec.setReadString("GACGCCGAGGAAAAACAGGCGGATCGTCAGGAGTT"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "2=1X13=5N20="); samrec.setBaseQualityString("4316%%68883-56+141663,2.3----45/.,2"); samrec.setFlags(73); validator.setData( samrec, DnaUtils.encodeString("GACGCCGAGG AAAAACAGGCGGATCGTCAGGAGTT".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate(DnaUtils.encodeString("GAGGCCGAGGGGGGGAAAAACAGGCGGATCGTCAGGAGTT")); try { validator.parse(); assertFalse(validator.isValid()); assertTrue( validator.getInvalidReason(), validator .getInvalidReason() .contains("Read delta (" + SamUtils.CG_READ_DELTA + ") too short, ")); } catch (final AssertionError e) { assertEquals("readDelta.len=0 but should be 1", e.getMessage()); } samrec.setAttribute(SamUtils.CG_READ_DELTA, "T"); validator.setData( samrec, DnaUtils.encodeString("GACGCCGAGG AAAAACAGGCGGATCGTCAGGAGTT".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.parse(); assertFalse(validator.isValid()); assertTrue( validator.getInvalidReason(), validator .getInvalidReason() .contains(SamUtils.CG_READ_DELTA + " value: T does not match read value: C")); validator.setData( samrec, DnaUtils.encodeString("GAGGCCGAGG AAAAACAGGCGGATCGTCAGGAGTT".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.parse(); assertFalse(validator.isValid()); assertTrue( validator.getInvalidReason(), validator.getInvalidReason().contains("Expected mismatch")); samrec.setAlignmentStart(5); samrec.setCigarString("4=1I5=7N25="); samrec.setReadString("CTGTGGCATCGGGGGACCTGGGGCCCTCNCTGAGT"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "4=1I5=7N25="); samrec.setAttribute(SamUtils.CG_READ_DELTA, "A"); validator.setData( samrec, DnaUtils.encodeString("CTGTGGCATC GGGGGACCTGGGGCCCTCNCTGAGT".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate( DnaUtils.encodeString( "TGTTCTGTG CATCTTCCCTTGGGGGACCTGNGGCCCTCACTGAGTGGGTCCTCCATGGGTGACTGGTGA")); validator.parse(); assertFalse(validator.getInvalidReason(), validator.isValid()); assertTrue( validator.getInvalidReason(), validator .getInvalidReason() .contains("SDF read insert: G does not match SAM " + SamUtils.CG_READ_DELTA + ": A,")); samrec.setAlignmentStart(2); samrec.setCigarString("23=6N10="); samrec.setReadString("CTTCAGCGATGGAGAAACTCGGGTGTCTACGTA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B20=6N10="); samrec.setAttribute(SamUtils.CG_READ_DELTA, null); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "%6"); samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2"); samrec.setFlags(179); validator.setData( samrec, DnaUtils.encodeString("TACGTAGACA CCCGAGTGTCTCCATCGCTGTGAAG".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("2,./54----3.2,366141+65-38886%%6134")); validator.setTemplate( DnaUtils.encodeString("GCTTCAGCGATGGAGAAACTCGGGAAGTCGTGTCTACGTAGAACGTAGTT")); validator.parse(); assertFalse(validator.getInvalidReason(), validator.isValid()); assertTrue( validator.getInvalidReason(), validator.getInvalidReason().contains("Expected match, SDF read=C, template=A,")); samrec.setAlignmentStart(1); samrec.setCigarString("10M5N25M"); samrec.setReadString("GAGGCCGAGGCAGGCGGATCGTCAGGAGTTAAAAA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=5N25="); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, null); samrec.setBaseQualityString("4316%668883-56+141663,2.3----45/.,2"); samrec.setFlags(73); validator.setData( samrec, DnaUtils.encodeString("GAGGCCGAGG CAGGCGGATCGTCAGGAGTTAAAAA".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate(DnaUtils.encodeString("GAGGCCGAGGGGGGGCAGGCGGATCGTCAGGAGTTAAAAA")); validator.parse(); assertFalse(validator.getInvalidReason(), validator.isValid()); assertTrue( validator.getInvalidReason(), validator.getInvalidReason().contains("SDF and SAM qualities don't match,")); samrec.setAlignmentStart(1); samrec.setCigarString("25=5N10="); samrec.setReadString("tttgtaggtcggataaggcgttcatccgacacg"); samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B20=5N10="); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "6"); samrec.setReadNegativeStrandFlag(false); samrec.setFlags(67); try { validator.setData( samrec, DnaUtils.encodeString("tttgtgtaggtcggataaggcgttc atccgacacg".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); fail(); } catch (final BadSuperCigarException iae) { assertTrue( iae.getMessage(), iae.getMessage() .contains("SAM record qualities plus XQ not expected length. Was: 34 expected: 35")); } samrec.setAlignmentStart(1); samrec.setCigarString("25=5N10="); samrec.setReadString("tttgtaggtcggataaggcgttcatccgacacg"); samrec.setBaseQualityString("4316%%68883-56+141663,2.3----45/.,2"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B20=5N10="); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, null); samrec.setReadNegativeStrandFlag(false); samrec.setFlags(67); validator.setTemplate(DnaUtils.encodeString("tttgtaggtcggataaggcgttcgggggatccgacacg")); validator.setData( samrec, DnaUtils.encodeString("tttgtgtaggtcggataaggcgttc atccgacacg".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.parse(); assertFalse(validator.getInvalidReason(), validator.isValid()); assertTrue( validator.getInvalidReason(), validator .getInvalidReason() .contains( "Overlap described but no " + SamUtils.CG_OVERLAP_QUALITY + " field present")); }
public void testSimpleMatches() throws Exception { final SuperCigarValidator validator = new SuperCigarValidator(0); final SAMRecord samrec = new SAMRecord(null); samrec.setAlignmentStart(1); samrec.setCigarString("10M5N25M"); samrec.setReadString("GAGGCCGAGGCAGGCGGATCGTCAGGAGTTAAAAA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "10=5N25="); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 0); samrec.setBaseQualityString("4316%%68883-56+141663,2.3----45/.,2"); samrec.setFlags(73); validator.setData( samrec, DnaUtils.encodeString("GAGGCCGAGG CAGGCGGATCGTCAGGAGTTAAAAA".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate(DnaUtils.encodeString("GAGGCCGAGGGGGGGCAGGCGGATCGTCAGGAGTTAAAAA")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); samrec.setCigarString("2=1X7=5N25="); samrec.setReadString("GACGCCGAGGCAGGCGGATCGTCAGGAGTTAAAAA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "2=1X7=5N25="); samrec.setAttribute(SamUtils.CG_READ_DELTA, "C"); validator.setData( samrec, DnaUtils.encodeString("GACGCCGAGG CAGGCGGATCGTCAGGAGTTAAAAA".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.parse(); assertFalse(validator.getInvalidReason(), validator.isValid()); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, null); validator.setData( samrec, DnaUtils.encodeString("GACGCCGAGG CAGGCGGATCGTCAGGAGTTAAAAA".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); samrec.setAlignmentStart(5); samrec.setCigarString("4=1D6=5N25="); samrec.setReadString("CTGTCATCTTACCTGGGGCCCTCNCTGAGTGGGTC"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "4=1D6=5N25="); samrec.setAttribute(SamUtils.CG_READ_DELTA, null); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 2); validator.setData( samrec, DnaUtils.encodeString("CTGTCATCTT ACCTGGGGCCCTCNCTGAGTGGGTC".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate( DnaUtils.encodeString("TGTTCTGTGCATCTTCCCTTACCTGNGGCCCTCACTGAGTGGGTCCTCCATGGGTGACTGGTGA")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); samrec.setAlignmentStart(5); samrec.setCigarString("4=1I5=7N25="); samrec.setReadString("CTGTAGCATCACCTGGGGCCCTCNCTGAGTGGGTC"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "4=1I5=7N25="); samrec.setAttribute(SamUtils.CG_READ_DELTA, "A"); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, null); validator.setData( samrec, DnaUtils.encodeString("CTGTAGCATC ACCTGGGGCCCTCNCTGAGTGGGTC".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); samrec.setAlignmentStart(5); samrec.setCigarString("4=2D6=5N25="); samrec.setReadString("CTGTCATCTTACCTGGGGCCCTCNCTGAGTGGGTC"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "4=2D6=5N25="); samrec.setAttribute(SamUtils.CG_READ_DELTA, null); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 3); validator.setData( samrec, DnaUtils.encodeString("CTGTCATCTT ACCTGGGGCCCTCNCTGAGTGGGTC".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("4316%%68883-56+141663,2.3----45/.,2")); validator.setTemplate( DnaUtils.encodeString("TGTTCTGTGGCATCTTCCCTTACCTGNGGCCCTCACTGAGTGGGTCCTCCATGGGTGACTGGTGA")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); // reverse complement samrec.setAlignmentStart(2); samrec.setCigarString("23=6N10="); samrec.setReadString("CTTCAGCGATGGAGAAACTCGGGTGTCTACGTA"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "5=2B20=6N10="); samrec.setAttribute(SamUtils.CG_READ_DELTA, null); samrec.setAttribute(SamUtils.CG_OVERLAP_QUALITY, "%6"); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 0); samrec.setBaseQualityString("4316%8883-56+141663,2.3----45/.,2"); samrec.setFlags(179); validator.setData( samrec, DnaUtils.encodeString("TACGTAGACA CCCGAGTTTCTCCATCGCTGTGAAG".replaceAll(" ", "")), FastaUtils.asciiToRawQuality("2,./54----3.2,366141+65-38886%%6134")); validator.setTemplate( DnaUtils.encodeString("GCTTCAGCGATGGAGAAACTCGGGAAGTCGTGTCTACGTAGAACGTAGTT")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); }
public void testCGOverlapWithDeletion() throws Exception { // check that it is OK to not provide XQ if the overlap is deleted from the template final SuperCigarValidator validator = new SuperCigarValidator(0); final SAMRecord samrec = new SAMRecord(null); samrec.setAlignmentStart(1); samrec.setCigarString("29="); samrec.setReadString("AGGCAGGTAGATCATGAGGTGAAGAGATC"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "19=2B2D10="); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 3); samrec.setBaseQualityString("/////////////////////////////"); samrec.setFlags(179); final byte[] sdfRead = DnaUtils.encodeString("GATCTCTTCACCTCATGATCTACCTGCCT".replaceAll(" ", "")); final byte[] sdfQualities = FastaUtils.asciiToRawQuality("/////////////////////////////"); validator.setData(samrec, sdfRead, sdfQualities); validator.setTemplate(DnaUtils.encodeString("AGGCAGGTAGATCATGAGGTGAAGAGATC")); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "19=2B2N10="); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 0); validator.setData(samrec, sdfRead, sdfQualities); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "19=2B2H10="); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 0); validator.setData(samrec, sdfRead, sdfQualities); validator.parse(); assertTrue(validator.getInvalidReason(), validator.isValid()); samrec.setCigarString("19=2D8="); samrec.setReadString( "TGGCAGGTAGATCATGAGGAAGAGATC"); // <- this doesn't seem to be checked by anything samrec.setBaseQualityString("/////////////////////////////"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "19=2B1X1=2D8="); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 4); samrec.setAttribute(SamUtils.CG_READ_DELTA, "T"); validator.setData(samrec, sdfRead, sdfQualities); validator.parse(); assertFalse(validator.isValid()); assertTrue( validator.getInvalidReason(), validator .getInvalidReason() .contains("Overlap described but no XQ field present in SAM record")); samrec.setReadString("AGGCAGGTAGATCATGAGGAAGAGATC"); samrec.setBaseQualityString("/////////////////////////////"); samrec.setAttribute(SamUtils.CG_SUPER_CIGAR, "19=2B1X1=3X3=2X"); samrec.setAttribute(SamUtils.CG_READ_DELTA, "TAAGTC"); samrec.setAttribute(SamUtils.ATTRIBUTE_ALIGNMENT_SCORE, 6); validator.setData(samrec, sdfRead, sdfQualities); validator.parse(); assertFalse(validator.isValid()); assertTrue( validator.getInvalidReason(), validator .getInvalidReason() .contains("Overlap described but no XQ field present in SAM record")); }
@Override public void execute() { log.info("Initializing kmer code map..."); Map<Character, Integer> kmerCodeIndices = new HashMap<Character, Integer>(); kmerCodeIndices.put('0', 1); kmerCodeIndices.put('A', 3); kmerCodeIndices.put('B', 4); kmerCodeIndices.put('C', 5); kmerCodeIndices.put('_', 6); kmerCodeIndices.put('.', 7); kmerCodeIndices.put('1', 9); Map<Character, String> kmerCodeNames = new LinkedHashMap<Character, String>(); kmerCodeNames.put('0', "ref0"); kmerCodeNames.put('A', "repetitive"); kmerCodeNames.put('B', "both"); kmerCodeNames.put('C', "lowcoverage"); kmerCodeNames.put('_', "lowconfidence"); kmerCodeNames.put('.', "novel"); kmerCodeNames.put('1', "ref1"); if (KMER_CODE_NAMES != null) { for (Character c : kmerCodeNames.keySet()) { String cStr = String.valueOf(c); if (KMER_CODE_NAMES.containsKey(cStr)) { kmerCodeNames.put(c, KMER_CODE_NAMES.get(cStr)); } } } for (Character c : kmerCodeNames.keySet()) { log.info(" {} {}: {}", c, kmerCodeIndices.get(c), kmerCodeNames.get(c)); } log.info("Loading annotated contigs..."); Map<String, Map<String, String>> annotatedContigs = new HashMap<String, Map<String, String>>(); int kmerSize = 0; if (ANN.length() > 0) { TableReader tr = new TableReader(ANN); for (Map<String, String> te : tr) { String contigName = te.get("contigName"); if (kmerSize == 0) { kmerSize = te.get("seq").length() - te.get("kmerOrigin").length() + 1; } annotatedContigs.put(contigName, te); String[] ref0ToCanonicalExact = (te.get("ref0ToCanonicalExact").equals("NA") || te.get("ref0ToCanonicalExact").equals("*:0-0") ? "NA:0-0" : te.get("ref0ToCanonicalExact")) .split("[:-]"); String[] ref1ToCanonicalExact = (te.get("ref1ToCanonicalExact").equals("NA") || te.get("ref1ToCanonicalExact").equals("*:0-0") ? "NA:0-0" : te.get("ref1ToCanonicalExact")) .split("[:-]"); cout.println( te.get("sampleName") + "_" + te.get("accession") + "_" + contigName + " " + ref0ToCanonicalExact[0] + " " + ref0ToCanonicalExact[1] + " " + ref0ToCanonicalExact[2] + " radius1=0.8r"); cout.println( te.get("sampleName") + "_" + te.get("accession") + "_" + contigName + " " + ref1ToCanonicalExact[0] + " " + ref1ToCanonicalExact[1] + " " + ref1ToCanonicalExact[2] + " radius2=0.6r"); } } log.info(" contigs: {}", annotatedContigs.size()); log.info(" kmer size: {}", kmerSize); log.info("Computing kmer inheritance information..."); SAMFileHeader sfh = CONTIGS.getFileHeader(); for (Character c : kmerCodeNames.keySet()) { SAMReadGroupRecord rgr = new SAMReadGroupRecord(kmerCodeNames.get(c)); rgr.setSample(kmerCodeNames.get(c)); sfh.addReadGroup(rgr); } SAMFileWriterFactory sfwf = new SAMFileWriterFactory(); sfwf.setCreateIndex(true); SAMFileWriter sfw = sfwf.makeBAMWriter(sfh, false, bout); TableWriter tw = new TableWriter(sout); Set<IGVEntry> igvEntries = new TreeSet<IGVEntry>(); int numContigs = 0; for (SAMRecord contig : CONTIGS) { if (CONTIG_NAMES == null || CONTIG_NAMES.isEmpty() || CONTIG_NAMES.contains(contig.getReadName())) { Map<String, String> te = annotatedContigs.get(contig.getReadName()); if (annotatedContigs.containsKey(contig.getReadName())) { String seq = contig.getReadString(); // log.debug(" te: {}", te); String annSeq = te.get("seq"); String kmerOrigin = te.get("kmerOrigin"); Map<CortexKmer, Character> kmerCodes = new HashMap<CortexKmer, Character>(); for (int i = 0; i < kmerOrigin.length(); i++) { CortexKmer kmer = new CortexKmer(annSeq.substring(i, i + kmerSize)); Character code = kmerOrigin.charAt(i); kmerCodes.put(kmer, code); } Map<Character, Integer> kmerStats = new HashMap<Character, Integer>(); for (Character c : kmerCodeNames.keySet()) { kmerStats.put(c, 0); } boolean changed = false; // We want to be able to examine soft-clipped regions as well. List<CigarElement> ces = new ArrayList<CigarElement>(); for (CigarElement ce : contig.getCigar().getCigarElements()) { if (ce.getOperator().equals(CigarOperator.S)) { ces.add(new CigarElement(ce.getLength(), CigarOperator.M)); changed = true; } else { ces.add(ce); } } if (changed) { CigarElement firstCe = contig.getCigar().getCigarElements().get(0); if (firstCe.getOperator().equals(CigarOperator.S)) { contig.setAlignmentStart(contig.getAlignmentStart() - firstCe.getLength()); } contig.setCigar(new Cigar(ces)); } for (AlignmentBlock ab : contig.getAlignmentBlocks()) { for (int i = ab.getReadStart() - 1; i < ab.getReadStart() + ab.getLength(); i++) { if (i + kmerSize < seq.length()) { CortexKmer kmer = new CortexKmer(seq.substring(i, i + kmerSize)); SAMRecord skmer = new SAMRecord(CONTIGS.getFileHeader()); skmer.setReadBases(seq.substring(i, i + kmerSize).getBytes()); List<CigarElement> cigarElements = new ArrayList<CigarElement>(); cigarElements.add(new CigarElement(kmerSize, CigarOperator.M)); Cigar cigar = new Cigar(cigarElements); skmer.setReadName(contig.getReadName() + "." + kmer.getKmerAsString()); skmer.setReferenceName(contig.getReferenceName()); skmer.setCigar(cigar); skmer.setReadPairedFlag(false); skmer.setDuplicateReadFlag(false); skmer.setMateNegativeStrandFlag(false); skmer.setAlignmentStart(ab.getReferenceStart() - ab.getReadStart() + 1 + i); skmer.setAttribute("RG", "none"); skmer.setMappingQuality(0); Character c = kmerCodes.get(kmer); String codeName = kmerCodeNames.get(c); String parentReadGroupId = null; String sampleReadGroupId = null; for (SAMReadGroupRecord rgr : sfh.getReadGroups()) { if (rgr.getSample().equals(codeName)) { parentReadGroupId = rgr.getReadGroupId(); } if (rgr.getSample().equals(contig.getReadGroup().getSample())) { sampleReadGroupId = rgr.getReadGroupId(); } } skmer.setAttribute( "RG", parentReadGroupId != null ? parentReadGroupId : sampleReadGroupId); skmer.setMappingQuality(99); sfw.addAlignment(skmer); kmerStats.put(c, kmerStats.get(c) + 1); IGVEntry igvEntry = new IGVEntry(); igvEntry.chromosome = contig.getReferenceName(); igvEntry.start = ab.getReferenceStart() - ab.getReadStart() + i; igvEntry.parentageName = kmerCodeNames.get(c); igvEntry.parentage = kmerCodeIndices.get(c); igvEntries.add(igvEntry); } } } if (!contig.isSecondaryOrSupplementary()) { beout.println( contig.getReferenceName() + "\t" + contig.getAlignmentStart() + "\t" + contig.getAlignmentEnd() + "\t" + contig.getReadName() + "." + contig.getReadGroup().getSample()); if (annotatedContigs.size() > 10 && numContigs % (annotatedContigs.size() / 10) == 0) { log.info(" processed {}/{} contigs", numContigs, annotatedContigs.size()); } numContigs++; } Map<String, String> stats = new LinkedHashMap<String, String>(); stats.put("contigName", contig.getReadName()); stats.put("sampleName", contig.getReadGroup().getSample()); for (Character c : kmerCodeNames.keySet()) { stats.put(kmerCodeNames.get(c), String.valueOf(kmerStats.get(c))); } tw.addEntry(stats); } } } log.info("Writing kmer inheritance information..."); out.printf("%s\t%s\t%s\t%s\t%s\n", "Chromosome", "Start", "End", "Feature", "Parentage"); for (IGVEntry igvEntry : igvEntries) { out.printf( "%s\t%d\t%d\t%s\t%d\n", igvEntry.chromosome, igvEntry.start, igvEntry.start + 1, igvEntry.parentageName, igvEntry.parentage); } sfw.close(); }