private void outputMissingInterval(final IntervalStratification stats) { final GenomeLoc interval = stats.getInterval(); final boolean missing[] = new boolean[interval.size()]; Arrays.fill(missing, true); for (AbstractStratification sample : stats.getElements()) { if (hasMissingStatuses(sample)) { int pos = 0; for (AbstractStratification locus : sample.getElements()) { if (locus.callableStatuses().isEmpty()) { missing[pos] = false; } pos++; } } } int start = -1; boolean insideMissing = false; for (int i = 0; i < missing.length; i++) { if (missing[i] && !insideMissing) { start = interval.getStart() + i; insideMissing = true; } else if (!missing[i] && insideMissing) { final int stop = interval.getStart() + i - 1; outputMissingInterval(interval.getContig(), start, stop); insideMissing = false; } } if (insideMissing) { outputMissingInterval(interval.getContig(), start, interval.getStop()); } }
private void writeDifferences( final List<VariantContext> source1Alleles, final List<VariantContext> source2Alleles) { int currentIndex1 = 0, currentIndex2 = 0; final int size1 = source1Alleles.size(), size2 = source2Alleles.size(); VariantContext current1 = source1Alleles.get(0); VariantContext current2 = source2Alleles.get(0); while (currentIndex1 < size1 || currentIndex2 < size2) { if (current1 == null) { writeOne(current2, source2, null); currentIndex2++; current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2) : null); } else if (current2 == null) { writeOne(current1, source1, null); currentIndex1++; current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1) : null); } else { final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1); final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2); if (loc1.getStart() == loc2.getStart() || loc1.overlapsP(loc2)) { String status; if (loc1.getStart() == loc2.getStart()) { final String allele1 = current1.getAlternateAllele(0).getBaseString(); final String allele2 = current2.getAlternateAllele(0).getBaseString(); if (allele1.indexOf(allele2) != -1 || allele2.indexOf(allele1) != -1) status = ONE_ALLELE_SUBSET_OF_OTHER_STATUS; else status = SAME_START_DIFFERENT_ALLELES_STATUS; } else { status = OVERLAPPING_EVENTS_STATUS; } writeOne(current1, INTERSECTION_SET, status); currentIndex1++; currentIndex2++; current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1) : null); current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2) : null); } else if (loc1.isBefore(loc2)) { writeOne(current1, source1, null); currentIndex1++; current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1) : null); } else { writeOne(current2, source2, null); currentIndex2++; current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2) : null); } } } }
/** * Loop over all of the reads in this likelihood map and realign them to its most likely haplotype * * @param haplotypes the collection of haplotypes * @param paddedReferenceLoc the active region */ public void realignReadsToMostLikelyHaplotype( final Collection<Haplotype> haplotypes, final GenomeLoc paddedReferenceLoc) { // we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a // requirement currently final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size()); Haplotype refHaplotype = null; for (final Haplotype haplotype : haplotypes) { alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype); if (refHaplotype == null && haplotype.isReference()) refHaplotype = haplotype; } final Map<GATKSAMRecord, Map<Allele, Double>> newLikelihoodReadMap = new LinkedHashMap<>(likelihoodReadMap.size()); for (final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet()) { final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue()); final GATKSAMRecord alignedToRef = AlignmentUtils.createReadAlignedToRef( entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), refHaplotype, paddedReferenceLoc.getStart(), bestAllele.isInformative()); newLikelihoodReadMap.put(alignedToRef, entry.getValue()); } likelihoodReadMap.clear(); likelihoodReadMap.putAll(newLikelihoodReadMap); }
public Event(GenomeLoc loc, int furthestStopPos, EVENT_TYPE type) { this.loc = loc; this.furthestStopPos = furthestStopPos; this.type = type; if (type == EVENT_TYPE.INDEL_EVENT || type == EVENT_TYPE.BOTH) { eventStartPos = loc.getStart(); eventStopPos = loc.getStop(); } else { eventStartPos = -1; eventStopPos = -1; } if (type == EVENT_TYPE.POINT_EVENT || type == EVENT_TYPE.BOTH) { pointEvents.add(loc.getStart()); } }
private void writeAndPurgeAllEqualVariants( final List<VariantContext> sourceVCs1, final List<VariantContext> sourceVCs2, final String status) { int currentIndex1 = 0, currentIndex2 = 0; int size1 = sourceVCs1.size(), size2 = sourceVCs2.size(); VariantContext current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1) : null); VariantContext current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2) : null); while (current1 != null && current2 != null) { final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1); final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2); if (loc1.equals(loc2) || (loc1.getStart() == loc2.getStart() && (current1.getAlternateAlleles().size() > 1 || current2.getAlternateAlleles().size() > 1))) { // test the alleles if (determineAndWriteOverlap(current1, current2, status)) { sourceVCs1.remove(currentIndex1); sourceVCs2.remove(currentIndex2); size1--; size2--; } else { currentIndex1++; currentIndex2++; } current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1) : null); current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2) : null); } else if (loc1.isBefore(loc2)) { currentIndex1++; current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1) : null); } else { currentIndex2++; current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2) : null); } } }
/** * Takes the interval, finds it in the stash, prints it to the VCF * * @param stats The statistics of the interval * @param refAllele the reference allele */ private void outputStatsToVCF(final IntervalStratification stats, final Allele refAllele) { GenomeLoc interval = stats.getInterval(); final List<Allele> alleles = new ArrayList<>(); final Map<String, Object> attributes = new HashMap<>(); final ArrayList<Genotype> genotypes = new ArrayList<>(); for (String sample : samples) { final GenotypeBuilder gb = new GenotypeBuilder(sample); SampleStratification sampleStat = stats.getSampleStatistics(sample); gb.attribute( GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY, sampleStat.averageCoverage(interval.size())); gb.attribute(GATKVCFConstants.LOW_COVERAGE_LOCI, sampleStat.getNLowCoveredLoci()); gb.attribute(GATKVCFConstants.ZERO_COVERAGE_LOCI, sampleStat.getNUncoveredLoci()); gb.filters(statusToStrings(stats.getSampleStatistics(sample).callableStatuses(), false)); genotypes.add(gb.make()); } alleles.add(refAllele); alleles.add(SYMBOLIC_ALLELE); VariantContextBuilder vcb = new VariantContextBuilder( "DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles); vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); vcb.filters(new LinkedHashSet<>(statusToStrings(stats.callableStatuses(), true))); attributes.put(VCFConstants.END_KEY, interval.getStop()); attributes.put(GATKVCFConstants.AVG_INTERVAL_DP_KEY, stats.averageCoverage(interval.size())); attributes.put(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY, stats.gcContent()); vcb = vcb.attributes(attributes); vcb = vcb.genotypes(genotypes); vcfWriter.add(vcb.make()); }
public void onTraversalDone(EventPair sum) { if (sum.left != null && sum.left.isReportableEvent()) sum.intervals.add(sum.left.getLoc()); if (sum.right != null && sum.right.isReportableEvent()) sum.intervals.add(sum.right.getLoc()); if (FilenameUtils.getExtension(out.getName()).equals("interval_list")) { final SAMFileHeader masterSequenceDictionaryHeader = new SAMFileHeader(); masterSequenceDictionaryHeader.setSequenceDictionary( getToolkit().getMasterSequenceDictionary()); final IntervalList intervalList = new IntervalList(masterSequenceDictionaryHeader); for (GenomeLoc loc : sum.intervals) { intervalList.add(new Interval(loc.getContig(), loc.getStart(), loc.getStop())); } intervalList.write(out); } else { try (BufferedWriter bufferedWriter = IOUtil.openFileForBufferedWriting(out)) { for (GenomeLoc loc : sum.intervals) { bufferedWriter.write(loc.toString()); bufferedWriter.newLine(); } } catch (final IOException e) { throw new GATKException("Error writing out intervals to file: " + out.getAbsolutePath(), e); } } }
public String toString() { return String.format( "%s\t%d\t%d\t%s", loc.getContig(), loc.getStart() - 1, loc.getStop(), state); }
/** * Updating the location of this CalledBaseState by the new stop location * * @param newStop */ public void update(GenomeLoc newStop) { loc = genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), newStop.getStop()); }
public Event map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { boolean hasIndel = false; boolean hasInsertion = false; boolean hasPointEvent = false; int furthestStopPos = -1; // look at the rods for indels or SNPs if (tracker != null) { for (VariantContext vc : tracker.getValues(known)) { switch (vc.getType()) { case INDEL: hasIndel = true; if (vc.isSimpleInsertion()) hasInsertion = true; break; case SNP: hasPointEvent = true; break; case MIXED: hasPointEvent = true; hasIndel = true; if (vc.isSimpleInsertion()) hasInsertion = true; break; default: break; } if (hasIndel) furthestStopPos = vc.getEnd(); } } // look at the normal context to get deletions and positions with high entropy final ReadBackedPileup pileup = context.getBasePileup(); int mismatchQualities = 0, totalQualities = 0; final byte refBase = ref.getBase(); for (PileupElement p : pileup) { // check the ends of the reads to see how far they extend furthestStopPos = Math.max(furthestStopPos, p.getRead().getAlignmentEnd()); // is it a deletion or insertion? if (p.isDeletion() || p.isBeforeInsertion()) { hasIndel = true; if (p.isBeforeInsertion()) hasInsertion = true; } // look for mismatches else if (lookForMismatchEntropy) { if (p.getBase() != refBase) mismatchQualities += p.getQual(); totalQualities += p.getQual(); } } // make sure we're supposed to look for high entropy if (lookForMismatchEntropy && pileup.getNumberOfElements() >= minReadsAtLocus && (double) mismatchQualities / (double) totalQualities >= mismatchThreshold) hasPointEvent = true; // return null if no event occurred if (!hasIndel && !hasPointEvent) return null; // return null if we didn't find any usable reads/rods associated with the event if (furthestStopPos == -1) return null; GenomeLoc eventLoc = context.getLocation(); if (hasInsertion) eventLoc = getToolkit() .getGenomeLocParser() .createGenomeLoc(eventLoc.getContig(), eventLoc.getStart(), eventLoc.getStart() + 1); EVENT_TYPE eventType = (hasIndel ? (hasPointEvent ? EVENT_TYPE.BOTH : EVENT_TYPE.INDEL_EVENT) : EVENT_TYPE.POINT_EVENT); return new Event(eventLoc, furthestStopPos, eventType); }