private ArrayList<Allele> computeConsensusAlleles( ReferenceContext ref, Map<String, AlignmentContext> contexts, AlignmentContextUtils.ReadOrientation contextType) { Allele refAllele = null, altAllele = null; GenomeLoc loc = ref.getLocus(); ArrayList<Allele> aList = new ArrayList<Allele>(); HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>(); int insCount = 0, delCount = 0; // quick check of total number of indels in pileup for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) { AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup(); insCount += indelPileup.getNumberOfInsertions(); delCount += indelPileup.getNumberOfDeletions(); } if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping) return aList; for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) { // todo -- warning, can be duplicating expensive partition here AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup(); for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) { // SAMRecord read = p.getRead(); GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead()); if (read == null) continue; if (ReadUtils.is454Read(read)) { continue; } /* if (DEBUG && p.isIndel()) { System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n", read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(), p.getEventLength(),p.getType().toString(), p.getEventBases()); } */ String indelString = p.getEventBases(); if (p.isInsertion()) { boolean foundKey = false; if (read.getAlignmentEnd() == loc.getStart()) { // first corner condition: a read has an insertion at the end, and we're right at the // insertion. // In this case, the read could have any of the inserted bases and we need to build a // consensus for (String s : consensusIndelStrings.keySet()) { int cnt = consensusIndelStrings.get(s); if (s.startsWith(indelString)) { // case 1: current insertion is prefix of indel in hash map consensusIndelStrings.put(s, cnt + 1); foundKey = true; break; } else if (indelString.startsWith(s)) { // case 2: indel stored in hash table is prefix of current insertion // In this case, new bases are new key. consensusIndelStrings.remove(s); consensusIndelStrings.put(indelString, cnt + 1); foundKey = true; break; } } if (!foundKey) // none of the above: event bases not supported by previous table, so add new key consensusIndelStrings.put(indelString, 1); } else if (read.getAlignmentStart() == loc.getStart() + 1) { // opposite corner condition: read will start at current locus with an insertion for (String s : consensusIndelStrings.keySet()) { int cnt = consensusIndelStrings.get(s); if (s.endsWith(indelString)) { // case 1: current insertion is suffix of indel in hash map consensusIndelStrings.put(s, cnt + 1); foundKey = true; break; } else if (indelString.endsWith(s)) { // case 2: indel stored in hash table is suffix of current insertion // In this case, new bases are new key. consensusIndelStrings.remove(s); consensusIndelStrings.put(indelString, cnt + 1); foundKey = true; break; } } if (!foundKey) // none of the above: event bases not supported by previous table, so add new key consensusIndelStrings.put(indelString, 1); } else { // normal case: insertion somewhere in the middle of a read: add count to hash map int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0; consensusIndelStrings.put(indelString, cnt + 1); } } else if (p.isDeletion()) { indelString = String.format("D%d", p.getEventLength()); int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0; consensusIndelStrings.put(indelString, cnt + 1); } } /* if (DEBUG) { int icount = indelPileup.getNumberOfInsertions(); int dcount = indelPileup.getNumberOfDeletions(); if (icount + dcount > 0) { List<Pair<String,Integer>> eventStrings = indelPileup.getEventStringsWithCounts(ref.getBases()); System.out.format("#ins: %d, #del:%d\n", insCount, delCount); for (int i=0 ; i < eventStrings.size() ; i++ ) { System.out.format("%s:%d,",eventStrings.get(i).first,eventStrings.get(i).second); // int k=0; } System.out.println(); } } */ } int maxAlleleCnt = 0; String bestAltAllele = ""; for (String s : consensusIndelStrings.keySet()) { int curCnt = consensusIndelStrings.get(s); if (curCnt > maxAlleleCnt) { maxAlleleCnt = curCnt; bestAltAllele = s; } // if (DEBUG) // System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) ); } // gdebug- if (maxAlleleCnt < minIndelCountForGenotyping) return aList; if (bestAltAllele.startsWith("D")) { // get deletion length int dLen = Integer.valueOf(bestAltAllele.substring(1)); // get ref bases of accurate deletion int startIdxInReference = (int) (1 + loc.getStart() - ref.getWindow().getStart()); // System.out.println(new String(ref.getBases())); byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen); if (Allele.acceptableAlleleBases(refBases)) { refAllele = Allele.create(refBases, true); altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); } } else { // insertion case if (Allele.acceptableAlleleBases(bestAltAllele)) { refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); altAllele = Allele.create(bestAltAllele, false); } } if (refAllele != null && altAllele != null) { aList.add(0, refAllele); aList.add(1, altAllele); } return aList; }
private Map<String, Object> annotateIndel(AlignmentContext stratifiedContext, VariantContext vc) { if (!stratifiedContext.hasExtendedEventPileup()) { return null; } ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup(); if (pileup == null) return null; int totalDepth = pileup.size(); Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), totalDepth); // put total depth in right away if (totalDepth == 0) return map; int mq0 = 0; // number of "ref" reads that are acually mq0 HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>(); Allele refAllele = vc.getReference(); for (Allele allele : vc.getAlternateAlleles()) { if (allele.isNoCall()) { continue; // this does not look so good, should we die??? } alleleCounts.put(getAlleleRepresentation(allele), 0); } for (ExtendedEventPileupElement e : pileup.toExtendedIterable()) { if (e.getMappingQual() == 0) { mq0++; continue; } if (e.isInsertion()) { final String b = e.getEventBases(); if (alleleCounts.containsKey(b)) { alleleCounts.put(b, alleleCounts.get(b) + 1); } } else { if (e.isDeletion()) { if (e.getEventLength() == refAllele.length()) { // this is indeed the deletion allele recorded in VC final String b = DEL; if (alleleCounts.containsKey(b)) { alleleCounts.put(b, alleleCounts.get(b) + 1); } } // else { // System.out.print(" deletion of WRONG length found"); // } } } } if (mq0 == totalDepth) return map; String[] fracs = new String[alleleCounts.size()]; for (int i = 0; i < vc.getAlternateAlleles().size(); i++) fracs[i] = String.format( "%.3f", ((float) alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i)))) / (totalDepth - mq0)); map.put(getKeyNames().get(1), fracs); // map.put(getKeyNames().get(0), counts); return map; }