@Override public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { GenomeLoc refLocus = ref.getLocus(); while (currentInterval == null || currentInterval.isBefore(refLocus)) { if (!intervalListIterator.hasNext()) return 0L; currentInterval = intervalListIterator.next(); currentIntervalStatistics = intervalMap.get(currentInterval); } if (currentInterval.isPast(refLocus)) return 0L; byte[] mappingQualities = context.getBasePileup().getMappingQuals(); byte[] baseQualities = context.getBasePileup().getQuals(); int coverage = context .getBasePileup() .getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality) .depthOfCoverage(); int rawCoverage = context.size(); IntervalStatisticLocus locusData = new IntervalStatisticLocus(mappingQualities, baseQualities, coverage, rawCoverage); currentIntervalStatistics.addLocus(refLocus, locusData); return 1L; }
/** * Determine if the given loc overlaps any loc in the sorted set * * @param loc the location to test * @return */ public boolean overlaps(final GenomeLoc loc) { for (final GenomeLoc e : mArray) { if (e.overlapsP(loc)) { return true; } } return false; }
/** * return a deep copy of this collection. * * @return a new GenomeLocSortedSet, identical to the current GenomeLocSortedSet. */ public GenomeLocSortedSet clone() { GenomeLocSortedSet ret = new GenomeLocSortedSet(genomeLocParser); for (GenomeLoc loc : this.mArray) { // ensure a deep copy ret.mArray.add( genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStop())); } return ret; }
public String toString() { StringBuilder s = new StringBuilder(); s.append("["); for (GenomeLoc e : this) { s.append(" "); s.append(e.toString()); } s.append("]"); return s.toString(); }
private GenomeLoc createIntervalAfter(GenomeLoc interval) { int contigLimit = getToolkit() .getSAMFileHeader() .getSequenceDictionary() .getSequence(interval.getContigIndex()) .getSequenceLength(); int start = Math.min(interval.getStop() + 1, contigLimit); int stop = Math.min(interval.getStop() + expandInterval, contigLimit); return parser.createGenomeLoc(interval.getContig(), interval.getContigIndex(), start, stop); }
@Test public void deleteSomeByRegion() { GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100); mSortedSet.add(e); for (int x = 1; x < 50; x++) { GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName, x, x); mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, del)); } assertTrue(!mSortedSet.isEmpty()); assertTrue(mSortedSet.size() == 1); GenomeLoc loc = mSortedSet.iterator().next(); assertTrue(loc.getStop() == 100); assertTrue(loc.getStart() == 50); }
/** * Test the reads according to an independently derived context. * * @param view * @param range * @param reads */ @Override protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<GATKSAMRecord> reads) { AllLocusView allLocusView = (AllLocusView) view; // TODO: Should skip over loci not in the given range. GenomeLoc firstLoc = range.get(0); GenomeLoc lastLoc = range.get(range.size() - 1); GenomeLoc bounds = genomeLocParser.createGenomeLoc( firstLoc.getContig(), firstLoc.getStart(), lastLoc.getStop()); for (int i = bounds.getStart(); i <= bounds.getStop(); i++) { GenomeLoc site = genomeLocParser.createGenomeLoc("chr1", i); AlignmentContext locusContext = allLocusView.next(); Assert.assertEquals(locusContext.getLocation(), site, "Locus context location is incorrect"); int expectedReadsAtSite = 0; for (GATKSAMRecord read : reads) { if (genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) { Assert.assertTrue( locusContext.getReads().contains(read), "Target locus context does not contain reads"); expectedReadsAtSite++; } } Assert.assertEquals( locusContext.getReads().size(), expectedReadsAtSite, "Found wrong number of reads at site"); } }
@Test public void mergingOverlappingAbove() { GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50); GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 49, 100); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); mSortedSet.addRegion(e); assertTrue(mSortedSet.size() == 1); Iterator<GenomeLoc> iter = mSortedSet.iterator(); GenomeLoc loc = iter.next(); assertEquals(loc.getStart(), 0); assertEquals(loc.getStop(), 100); assertEquals(loc.getContigIndex(), 1); }
@Test public void fromSequenceDictionary() { mSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary()); // we should have sequence assertTrue(mSortedSet.size() == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES); int seqNumber = 0; for (GenomeLoc loc : mSortedSet) { assertTrue(loc.getStart() == 1); assertTrue(loc.getStop() == GenomeLocSortedSetUnitTest.CHROMOSOME_SIZE); assertTrue(loc.getContigIndex() == seqNumber); ++seqNumber; } assertTrue(seqNumber == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES); }
private boolean overlapsKnownCNV(VariantContext cnv) { if (knownCNVs != null) { final GenomeLoc loc = getWalker().getToolkit().getGenomeLocParser().createGenomeLoc(cnv, true); IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig()); final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop()); while (nodeIt.hasNext()) { final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue()); if (overlapP > MIN_CNV_OVERLAP) return true; } } return false; }
/** * Utility routine that prints out process information (including timing) every N records or every * M seconds, for N and M set in global variables. * * <p>Synchronized to ensure that even with multiple threads calling notifyOfProgress we still get * one clean stream of meter logs. * * <p>Note this thread doesn't actually print progress, unless must print is true, but just * registers the progress itself. A separate printing daemon periodically polls the meter to print * out progress * * @param loc Current location, can be null if you are at the end of the processing unit * @param nTotalRecordsProcessed the total number of records we've processed */ public synchronized void notifyOfProgress( final GenomeLoc loc, final long nTotalRecordsProcessed) { if (nTotalRecordsProcessed < 0) throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0"); // weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc // == null (on startup) this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc)); this.nTotalRecordsProcessed = Math.max(this.nTotalRecordsProcessed, nTotalRecordsProcessed); // a pretty name for our position this.positionMessage = maxGenomeLoc == null ? "unmapped reads" : String.format("%s:%d", maxGenomeLoc.getContig(), maxGenomeLoc.getStart()); }
@Ensures("result != null") public final String toString() { if (GenomeLoc.isUnmapped(this)) return "unmapped"; if (throughEndOfContigP() && atBeginningOfContigP()) return getContig(); else if (throughEndOfContigP() || getStart() == getStop()) return String.format("%s:%d", getContig(), getStart()); else return String.format("%s:%d-%d", getContig(), getStart(), getStop()); }
/** * add a genomeLoc to the collection, simply inserting in order into the set * * @param e the GenomeLoc to add * @return true */ public boolean add(GenomeLoc e) { // assuming that the intervals coming arrive in order saves us a fair amount of time (and it's // most likely true) if (mArray.size() > 0 && e.isPast(mArray.get(mArray.size() - 1))) { mArray.add(e); return true; } else { int loc = Collections.binarySearch(mArray, e); if (loc >= 0) { throw new ReviewedStingException( "Genome Loc Sorted Set already contains the GenomicLoc " + e.toString()); } else { mArray.add((loc + 1) * -1, e); return true; } } }
public final Map<String, IntervalTree<GenomeLoc>> createIntervalTreeByContig( final IntervalBinding<Feature> intervals) { final Map<String, IntervalTree<GenomeLoc>> byContig = new HashMap<String, IntervalTree<GenomeLoc>>(); final List<GenomeLoc> locs = intervals.getIntervals(getToolkit()); // set up the map from contig -> interval tree for (final String contig : getContigNames()) byContig.put(contig, new IntervalTree<GenomeLoc>()); for (final GenomeLoc loc : locs) { byContig.get(loc.getContig()).put(loc.getStart(), loc.getStop(), loc); } return byContig; }
/** * Returns a new GenomeLoc that represents the region between the endpoints of this and that. * Requires that this and that GenomeLoc are both mapped. */ @Requires({"that != null", "isUnmapped(this) == isUnmapped(that)"}) @Ensures("result != null") public GenomeLoc endpointSpan(GenomeLoc that) throws ReviewedStingException { if (GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) { throw new ReviewedStingException("Cannot get endpoint span for unmerged genome locs"); } if (!this.getContig().equals(that.getContig())) { throw new ReviewedStingException( "Cannot get endpoint span for genome locs on different contigs"); } return new GenomeLoc( getContig(), this.contigIndex, Math.min(getStart(), that.getStart()), Math.max(getStop(), that.getStop())); }
public GenomeLocSortedSet subtractRegions(GenomeLocSortedSet toRemoveSet) { LinkedList<GenomeLoc> good = new LinkedList<GenomeLoc>(); Stack<GenomeLoc> toProcess = new Stack<GenomeLoc>(); Stack<GenomeLoc> toExclude = new Stack<GenomeLoc>(); // initialize the stacks toProcess.addAll(mArray); Collections.reverse(toProcess); toExclude.addAll(toRemoveSet.mArray); Collections.reverse(toExclude); int i = 0; while (!toProcess.empty()) { // while there's still stuff to process if (toExclude.empty()) { good.addAll(toProcess); // no more excludes, all the processing stuff is good break; } GenomeLoc p = toProcess.peek(); GenomeLoc e = toExclude.peek(); if (p.overlapsP(e)) { toProcess.pop(); for (GenomeLoc newP : p.subtract(e)) toProcess.push(newP); } else if (p.compareContigs(e) < 0) { good.add(toProcess.pop()); // p is now good } else if (p.compareContigs(e) > 0) { toExclude.pop(); // e can't effect anything } else if (p.getStop() < e.getStart()) { good.add(toProcess.pop()); // p stops before e starts, p is good } else if (e.getStop() < p.getStart()) { toExclude.pop(); // p starts after e stops, e is done } else { throw new ReviewedStingException("BUG: unexpected condition: p=" + p + ", e=" + e); } if (i++ % 10000 == 0) logger.debug("removeRegions operation: i = " + i); } return createSetFromList(genomeLocParser, good); }
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M, T> walker, T sum, final int minStart, final String currentContig) { // Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can // unload those regions and process them // TODO can implement parallel traversal here while (workQueue.peek() != null) { final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc(); if (extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) { final ActiveRegion activeRegion = workQueue.remove(); sum = processActiveRegion(activeRegion, myReads, workQueue, sum, walker); } else { break; } } return sum; }
@Requires("that != null") @Ensures("result == 0 || result == 1 || result == -1") public int compareTo(GenomeLoc that) { int result = 0; if (this == that) { result = 0; } else if (GenomeLoc.isUnmapped(this)) result = 1; else if (GenomeLoc.isUnmapped(that)) result = -1; else { final int cmpContig = compareContigs(that); if (cmpContig != 0) { result = cmpContig; } else { if (this.getStart() < that.getStart()) result = -1; if (this.getStart() > that.getStart()) result = 1; } } return result; }
/** Return the minimum distance between any pair of bases in this and that GenomeLocs: */ @Requires("that != null") @Ensures("result >= 0") public final int minDistance(final GenomeLoc that) { if (!this.onSameContig(that)) return Integer.MAX_VALUE; int minDistance; if (this.isBefore(that)) minDistance = distanceFirstStopToSecondStart(this, that); else if (that.isBefore(this)) minDistance = distanceFirstStopToSecondStart(that, this); else // this and that overlap [and possibly one contains the other]: minDistance = 0; return minDistance; }
/** * Determines what is the position of the read in relation to the interval. Note: This function * uses the UNCLIPPED ENDS of the reads for the comparison. * * @param read the read * @param interval the interval * @return the overlap type as described by ReadAndIntervalOverlap enum (see above) */ public static ReadAndIntervalOverlap getReadAndIntervalOverlapType( GATKSAMRecord read, GenomeLoc interval) { int sStart = read.getSoftStart(); int sStop = read.getSoftEnd(); int uStart = read.getUnclippedStart(); int uStop = read.getUnclippedEnd(); if (!read.getReferenceName().equals(interval.getContig())) return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG; else if (uStop < interval.getStart()) return ReadAndIntervalOverlap.NO_OVERLAP_LEFT; else if (uStart > interval.getStop()) return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT; else if (sStop < interval.getStart()) return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT; else if (sStart > interval.getStop()) return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT; else if ((sStart >= interval.getStart()) && (sStop <= interval.getStop())) return ReadAndIntervalOverlap.OVERLAP_CONTAINED; else if ((sStart < interval.getStart()) && (sStop > interval.getStop())) return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT; else if ((sStart < interval.getStart())) return ReadAndIntervalOverlap.OVERLAP_LEFT; else return ReadAndIntervalOverlap.OVERLAP_RIGHT; }
static boolean mergeIntoMNPvalidationCheck( GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) { GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser, vc1); GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser, vc2); if (!loc1.onSameContig(loc2)) throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome"); if (!loc1.isBefore(loc2)) throw new ReviewedStingException("Can only merge if vc1 is BEFORE vc2"); if (vc1.isFiltered() || vc2.isFiltered()) return false; if (!vc1.getSampleNames() .equals(vc2.getSampleNames())) // vc1, vc2 refer to different sample sets return false; if (!allGenotypesAreUnfilteredAndCalled(vc1) || !allGenotypesAreUnfilteredAndCalled(vc2)) return false; return true; }
/** * Return the number of bps before loc in the sorted set * * @param loc the location before which we are counting bases * @return */ public long sizeBeforeLoc(GenomeLoc loc) { long s = 0; for (GenomeLoc e : this) { if (e.isBefore(loc)) s += e.size(); else if (e.isPast(loc)) ; // don't do anything else // loc is inside of s s += loc.getStart() - e.getStart(); } return s; }
@Test public void deleteSuperRegion() { GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 10, 20); GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 70, 100); mSortedSet.add(g); mSortedSet.addRegion(e); assertTrue(mSortedSet.size() == 2); // now delete a region GenomeLoc d = genomeLocParser.createGenomeLoc(contigOneName, 15, 75); mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, d)); Iterator<GenomeLoc> iter = mSortedSet.iterator(); GenomeLoc loc = iter.next(); assertTrue(loc.getStart() == 10); assertTrue(loc.getStop() == 14); assertTrue(loc.getContigIndex() == 1); loc = iter.next(); assertTrue(loc.getStart() == 76); assertTrue(loc.getStop() == 100); assertTrue(loc.getContigIndex() == 1); }
/** * Fully qualified constructor: instantiates a new GATKFeatureRecordList object with specified * GATKFeature track name, location on the reference, and list of associated GATKFeatures. This is * a knee-deep COPY constructor: passed name, loc, and data element objects will be referenced * from the created GATKFeatureRecordList (so that changing them from outside will affect data in * this object), however, the data elements will be copied into a newly allocated list, so that * the 'data' collection argument can be modified afterwards without affecting the state of this * record list. WARNING: this constructor is (semi-)validating: passed name and location are * allowed to be nulls (although it maybe unsafe, use caution), but if they are not nulls, then * passed non-null GATKFeature data elements must have same track name, and their locations must * overlap with the passed 'location' argument. Null data elements or null 'data' collection * argument are allowed as well. * * @param name the name of the track * @param data the collection of features at this location * @param loc the location */ public RODRecordListImpl(String name, Collection<GATKFeature> data, GenomeLoc loc) { this.records = new ArrayList<GATKFeature>(data == null ? 0 : data.size()); this.name = name; this.location = loc; if (data == null || data.size() == 0) return; // empty dataset, nothing to do for (GATKFeature r : data) { records.add(r); if (r == null) continue; if (!this.name.equals(r.getName())) { throw new ReviewedStingException( "Attempt to add GATKFeature with non-matching name " + r.getName() + " to the track " + name); } if (location != null && !location.overlapsP(r.getLocation())) { throw new ReviewedStingException( "Attempt to add GATKFeature that lies outside of specified interval " + location + "; offending GATKFeature:\n" + r.toString()); } } }
/** * Returns a new GenomeLoc that represents the entire span of this and that. Requires that this * and that GenomeLoc are contiguous and both mapped */ @Requires({"that != null", "isUnmapped(this) == isUnmapped(that)"}) @Ensures("result != null") public GenomeLoc merge(GenomeLoc that) throws ReviewedStingException { if (GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) { if (!GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that)) throw new ReviewedStingException("Tried to merge a mapped and an unmapped genome loc"); return UNMAPPED; } if (!(this.contiguousP(that))) { throw new ReviewedStingException("The two genome loc's need to be contigous"); } return new GenomeLoc( getContig(), this.contigIndex, Math.min(getStart(), that.getStart()), Math.max(getStop(), that.getStop())); }
@Requires("that != null") @Ensures("result != null") public GenomeLoc intersect(GenomeLoc that) throws ReviewedStingException { if (GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) { if (!GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that)) throw new ReviewedStingException("Tried to intersect a mapped and an unmapped genome loc"); return UNMAPPED; } if (!(this.overlapsP(that))) { throw new ReviewedStingException( "GenomeLoc::intersect(): The two genome loc's need to overlap"); } return new GenomeLoc( getContig(), this.contigIndex, Math.max(getStart(), that.getStart()), Math.min(getStop(), that.getStop())); }
public Iterable<Shard> createShardsOverIntervals( final SAMDataSource readsDataSource, final GenomeLocSortedSet intervals, final int maxShardSize) { List<Shard> shards = new ArrayList<Shard>(); for (GenomeLoc interval : intervals) { while (interval.size() > maxShardSize) { shards.add( new LocusShard( intervals.getGenomeLocParser(), readsDataSource, Collections.singletonList( intervals .getGenomeLocParser() .createGenomeLoc( interval.getContig(), interval.getStart(), interval.getStart() + maxShardSize - 1)), null)); interval = intervals .getGenomeLocParser() .createGenomeLoc( interval.getContig(), interval.getStart() + maxShardSize, interval.getStop()); } shards.add( new LocusShard( intervals.getGenomeLocParser(), readsDataSource, Collections.singletonList(interval), null)); } return shards; }
private ArrayList<Allele> computeConsensusAlleles( ReferenceContext ref, Map<String, AlignmentContext> contexts, AlignmentContextUtils.ReadOrientation contextType) { Allele refAllele = null, altAllele = null; GenomeLoc loc = ref.getLocus(); ArrayList<Allele> aList = new ArrayList<Allele>(); HashMap<String, Integer> consensusIndelStrings = new HashMap<String, Integer>(); int insCount = 0, delCount = 0; // quick check of total number of indels in pileup for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) { AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup(); insCount += indelPileup.getNumberOfInsertions(); delCount += indelPileup.getNumberOfDeletions(); } if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping) return aList; for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) { // todo -- warning, can be duplicating expensive partition here AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup(); for (ExtendedEventPileupElement p : indelPileup.toExtendedIterable()) { // SAMRecord read = p.getRead(); GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead()); if (read == null) continue; if (ReadUtils.is454Read(read)) { continue; } /* if (DEBUG && p.isIndel()) { System.out.format("Read: %s, cigar: %s, aln start: %d, aln end: %d, p.len:%d, Type:%s, EventBases:%s\n", read.getReadName(),read.getCigar().toString(),read.getAlignmentStart(),read.getAlignmentEnd(), p.getEventLength(),p.getType().toString(), p.getEventBases()); } */ String indelString = p.getEventBases(); if (p.isInsertion()) { boolean foundKey = false; if (read.getAlignmentEnd() == loc.getStart()) { // first corner condition: a read has an insertion at the end, and we're right at the // insertion. // In this case, the read could have any of the inserted bases and we need to build a // consensus for (String s : consensusIndelStrings.keySet()) { int cnt = consensusIndelStrings.get(s); if (s.startsWith(indelString)) { // case 1: current insertion is prefix of indel in hash map consensusIndelStrings.put(s, cnt + 1); foundKey = true; break; } else if (indelString.startsWith(s)) { // case 2: indel stored in hash table is prefix of current insertion // In this case, new bases are new key. consensusIndelStrings.remove(s); consensusIndelStrings.put(indelString, cnt + 1); foundKey = true; break; } } if (!foundKey) // none of the above: event bases not supported by previous table, so add new key consensusIndelStrings.put(indelString, 1); } else if (read.getAlignmentStart() == loc.getStart() + 1) { // opposite corner condition: read will start at current locus with an insertion for (String s : consensusIndelStrings.keySet()) { int cnt = consensusIndelStrings.get(s); if (s.endsWith(indelString)) { // case 1: current insertion is suffix of indel in hash map consensusIndelStrings.put(s, cnt + 1); foundKey = true; break; } else if (indelString.endsWith(s)) { // case 2: indel stored in hash table is suffix of current insertion // In this case, new bases are new key. consensusIndelStrings.remove(s); consensusIndelStrings.put(indelString, cnt + 1); foundKey = true; break; } } if (!foundKey) // none of the above: event bases not supported by previous table, so add new key consensusIndelStrings.put(indelString, 1); } else { // normal case: insertion somewhere in the middle of a read: add count to hash map int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0; consensusIndelStrings.put(indelString, cnt + 1); } } else if (p.isDeletion()) { indelString = String.format("D%d", p.getEventLength()); int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0; consensusIndelStrings.put(indelString, cnt + 1); } } /* if (DEBUG) { int icount = indelPileup.getNumberOfInsertions(); int dcount = indelPileup.getNumberOfDeletions(); if (icount + dcount > 0) { List<Pair<String,Integer>> eventStrings = indelPileup.getEventStringsWithCounts(ref.getBases()); System.out.format("#ins: %d, #del:%d\n", insCount, delCount); for (int i=0 ; i < eventStrings.size() ; i++ ) { System.out.format("%s:%d,",eventStrings.get(i).first,eventStrings.get(i).second); // int k=0; } System.out.println(); } } */ } int maxAlleleCnt = 0; String bestAltAllele = ""; for (String s : consensusIndelStrings.keySet()) { int curCnt = consensusIndelStrings.get(s); if (curCnt > maxAlleleCnt) { maxAlleleCnt = curCnt; bestAltAllele = s; } // if (DEBUG) // System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) ); } // gdebug- if (maxAlleleCnt < minIndelCountForGenotyping) return aList; if (bestAltAllele.startsWith("D")) { // get deletion length int dLen = Integer.valueOf(bestAltAllele.substring(1)); // get ref bases of accurate deletion int startIdxInReference = (int) (1 + loc.getStart() - ref.getWindow().getStart()); // System.out.println(new String(ref.getBases())); byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen); if (Allele.acceptableAlleleBases(refBases)) { refAllele = Allele.create(refBases, true); altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); } } else { // insertion case if (Allele.acceptableAlleleBases(bestAltAllele)) { refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); altAllele = Allele.create(bestAltAllele, false); } } if (refAllele != null && altAllele != null) { aList.add(0, refAllele); aList.add(1, altAllele); } return aList; }
public Allele getLikelihoods( RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> contexts, AlignmentContextUtils.ReadOrientation contextType, GenotypePriors priors, Map<String, MultiallelicGenotypeLikelihoods> GLs, Allele alternateAlleleToUse, boolean useBAQedPileup) { if (tracker == null) return null; GenomeLoc loc = ref.getLocus(); Allele refAllele, altAllele; VariantContext vc = null; if (!ref.getLocus().equals(lastSiteVisited)) { // starting a new site: clear allele list alleleList.clear(); lastSiteVisited = ref.getLocus(); indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>()); haplotypeMap.clear(); if (getAlleleListFromVCF) { for (final VariantContext vc_input : tracker.getValues(UAC.alleles, loc)) { if (vc_input != null && allowableTypes.contains(vc_input.getType()) && ref.getLocus().getStart() == vc_input.getStart()) { vc = vc_input; break; } } // ignore places where we don't have a variant if (vc == null) return null; alleleList.clear(); if (ignoreSNPAllelesWhenGenotypingIndels) { // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore // it and don't genotype it for (Allele a : vc.getAlleles()) if (a.isNonReference() && a.getBases().length == vc.getReference().getBases().length) continue; else alleleList.add(a); } else { for (Allele a : vc.getAlleles()) alleleList.add(a); } } else { alleleList = computeConsensusAlleles(ref, contexts, contextType); if (alleleList.isEmpty()) return null; } } // protect against having an indel too close to the edge of a contig if (loc.getStart() <= HAPLOTYPE_SIZE) return null; // check if there is enough reference window to create haplotypes (can be an issue at end of // contigs) if (ref.getWindow().getStop() < loc.getStop() + HAPLOTYPE_SIZE) return null; if (!(priors instanceof DiploidIndelGenotypePriors)) throw new StingException( "Only diploid-based Indel priors are supported in the DINDEL GL model"); if (alleleList.isEmpty()) return null; refAllele = alleleList.get(0); altAllele = alleleList.get(1); // look for alt allele that has biggest length distance to ref allele int maxLenDiff = 0; for (Allele a : alleleList) { if (a.isNonReference()) { int lenDiff = Math.abs(a.getBaseString().length() - refAllele.getBaseString().length()); if (lenDiff > maxLenDiff) { maxLenDiff = lenDiff; altAllele = a; } } } final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length(); final int hsize = (int) ref.getWindow().size() - Math.abs(eventLength) - 1; final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1; haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(), ref, hsize, numPrefBases); // For each sample, get genotype likelihoods based on pileup // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with // them. // initialize the GenotypeLikelihoods GLs.clear(); for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) { AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); ReadBackedPileup pileup = null; if (context.hasExtendedEventPileup()) pileup = context.getExtendedEventPileup(); else if (context.hasBasePileup()) pileup = context.getBasePileup(); if (pileup != null) { final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap()); GLs.put( sample.getKey(), new MultiallelicGenotypeLikelihoods( sample.getKey(), alleleList, genotypeLikelihoods, getFilteredDepth(pileup))); if (DEBUG) { System.out.format("Sample:%s Alleles:%s GL:", sample.getKey(), alleleList.toString()); for (int k = 0; k < genotypeLikelihoods.length; k++) System.out.format("%1.4f ", genotypeLikelihoods[k]); System.out.println(); } } } return refAllele; }
/** * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep * or all sites (if this is empty) * * @param reader a just-opened reader sitting at the start of the file * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should * be kept * @param parser a genome loc parser to create genome locs * @return a list of ExactCall objects in reader * @throws IOException */ public static List<ExactCall> readExactLog( final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException { if (reader == null) throw new IllegalArgumentException("reader cannot be null"); if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null"); if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null"); List<ExactCall> calls = new LinkedList<ExactCall>(); // skip the header line reader.readLine(); // skip the first "type" line reader.readLine(); while (true) { final VariantContextBuilder builder = new VariantContextBuilder(); final List<Allele> alleles = new ArrayList<Allele>(); final List<Genotype> genotypes = new ArrayList<Genotype>(); final double[] posteriors = new double[2]; final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true); final List<Integer> mle = new ArrayList<Integer>(); final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(); long runtimeNano = -1; GenomeLoc currentLoc = null; while (true) { final String line = reader.readLine(); if (line == null) return calls; final String[] parts = line.split("\t"); final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]); final String variable = parts[1]; final String key = parts[2]; final String value = parts[3]; if (currentLoc == null) currentLoc = lineLoc; if (variable.equals("type")) { if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { builder.alleles(alleles); final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {})); final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele); calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; } else if (variable.equals("allele")) { final boolean isRef = key.equals("0"); alleles.add(Allele.create(value, isRef)); } else if (variable.equals("PL")) { final GenotypeBuilder gb = new GenotypeBuilder(key); gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); genotypes.add(gb.make()); } else if (variable.equals("log10PosteriorOfAFEq0")) { posteriors[0] = Double.valueOf(value); } else if (variable.equals("log10PosteriorOfAFGt0")) { posteriors[1] = Double.valueOf(value); } else if (variable.equals("MLE")) { mle.add(Integer.valueOf(value)); } else if (variable.equals("pNonRefByAllele")) { final Allele a = Allele.create(key); log10pNonRefByAllele.put(a, Double.valueOf(value)); } else if (variable.equals("runtime.nano")) { runtimeNano = Long.valueOf(value); } else { // nothing to do } } } }