// protected basic manipulation routines private static List<Allele> makeAlleles(Collection<Allele> alleles) { final List<Allele> alleleList = new ArrayList<Allele>(alleles.size()); boolean sawRef = false; for (final Allele a : alleles) { for (final Allele b : alleleList) { if (a.equals(b, true)) throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a); } // deal with the case where the first allele isn't the reference if (a.isReference()) { if (sawRef) throw new IllegalArgumentException( "Alleles for a VariantContext must contain at most one reference allele: " + alleles); alleleList.add(0, a); sawRef = true; } else alleleList.add(a); } if (alleleList.isEmpty()) throw new IllegalArgumentException( "Cannot create a VariantContext with an empty allele list"); if (alleleList.get(0).isNonReference()) throw new IllegalArgumentException( "Alleles for a VariantContext must contain at least one reference allele: " + alleles); return alleleList; }
@Test public void testFixReverseComplementedGenotypes() { final Allele refA = Allele.create("A", true); final Allele altC = Allele.create("C", false); final GenotypesContext originalGenotypes = GenotypesContext.create(3); originalGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refA, refA)).make()); originalGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refA, altC)).make()); originalGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altC, altC)).make()); final Allele refT = Allele.create("T", true); final Allele altG = Allele.create("G", false); final GenotypesContext expectedGenotypes = GenotypesContext.create(3); expectedGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refT, refT)).make()); expectedGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refT, altG)).make()); expectedGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altG, altG)).make()); final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<Allele, Allele>(2); reverseComplementAlleleMap.put(refA, refT); reverseComplementAlleleMap.put(altC, altG); final GenotypesContext actualGenotypes = LiftoverVcf.fixGenotypes(originalGenotypes, reverseComplementAlleleMap); for (final String sample : Arrays.asList("homref", "het", "homvar")) { final List<Allele> expected = expectedGenotypes.get(sample).getAlleles(); final List<Allele> actual = actualGenotypes.get(sample).getAlleles(); Assert.assertEquals(expected.get(0), actual.get(0)); Assert.assertEquals(expected.get(1), actual.get(1)); } }
private static Type typeOfBiallelicVariant(Allele ref, Allele allele) { if (ref.isSymbolic()) throw new IllegalStateException( "Unexpected error: encountered a record with a symbolic reference allele"); if (allele.isSymbolic()) return Type.SYMBOLIC; if (ref.length() == allele.length()) { if (allele.length() == 1) return Type.SNP; else return Type.MNP; } // Important note: previously we were checking that one allele is the prefix of the other. // However, that's not an // appropriate check as can be seen from the following example: // REF = CTTA and ALT = C,CT,CA // This should be assigned the INDEL type but was being marked as a MIXED type because of the // prefix check. // In truth, it should be absolutely impossible to return a MIXED type from this method because // it simply // performs a pairwise comparison of a single alternate allele against the reference allele // (whereas the MIXED type // is reserved for cases of multiple alternate alleles of different types). Therefore, if we've // reached this point // in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL. return Type.INDEL; // old incorrect logic: // if (oneIsPrefixOfOther(ref, allele)) // return Type.INDEL; // else // return Type.MIXED; }
protected void printVerboseData( String pos, VariantContext vc, double PofF, double phredScaledConfidence, final GenotypeLikelihoodsCalculationModel.Model model) { Allele refAllele = null, altAllele = null; for (Allele allele : vc.getAlleles()) { if (allele.isReference()) refAllele = allele; else altAllele = allele; } for (int i = 0; i <= N; i++) { StringBuilder AFline = new StringBuilder("AFINFO\t"); AFline.append(pos); AFline.append("\t"); AFline.append(refAllele); AFline.append("\t"); if (altAllele != null) AFline.append(altAllele); else AFline.append("N/A"); AFline.append("\t"); AFline.append(i + "/" + N + "\t"); AFline.append(String.format("%.2f\t", ((float) i) / N)); AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i])); verboseWriter.println(AFline.toString()); } verboseWriter.println("P(f>0) = " + PofF); verboseWriter.println("Qscore = " + phredScaledConfidence); verboseWriter.println(); }
protected final void printCallInfo( final VariantContext vc, final double[] log10AlleleFrequencyPriors, final long runtimeNano, final AFCalcResult result) { printCallElement(vc, "type", "ignore", vc.getType()); int allelei = 0; for (final Allele a : vc.getAlleles()) printCallElement(vc, "allele", allelei++, a.getDisplayString()); for (final Genotype g : vc.getGenotypes()) printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString()); for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++) printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]); printCallElement(vc, "runtime.nano", "ignore", runtimeNano); printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0()); printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0()); for (final Allele allele : result.getAllelesUsedInGenotyping()) { if (allele.isNonReference()) { printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele)); printCallElement( vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele)); } } callReport.flush(); }
public boolean hasSymbolicAlleles() { for (final Allele a : getAlleles()) { if (a.isSymbolic()) { return true; } } return false; }
/** * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding * NO_CALLS) * * @return chromosome count */ public int getCalledChrCount() { int n = 0; for (final Genotype g : getGenotypes()) { for (final Allele a : g.getAlleles()) n += a.isNoCall() ? 0 : 1; } return n; }
public boolean hasAllele(Allele allele, boolean ignoreRefState) { if (allele == REF || allele == ALT) // optimization for cached cases return true; for (Allele a : getAlleles()) { if (a.equals(allele, ignoreRefState)) return true; } return false; }
public int[] getGLIndecesOfAlternateAllele(Allele targetAllele) { int index = 1; for (Allele allele : getAlternateAlleles()) { if (allele.equals(targetAllele)) break; index++; } return GenotypeLikelihoods.getPLIndecesOfAlleles(0, index); }
private Collection<VariantContext> getVariantContexts( RefMetaDataTracker tracker, ReferenceContext ref) { List<Feature> features = tracker.getValues(variants, ref.getLocus()); List<VariantContext> VCs = new ArrayList<VariantContext>(features.size()); for (Feature record : features) { if (VariantContextAdaptors.canBeConvertedToVariantContext(record)) { // we need to special case the HapMap format because indels aren't handled correctly if (record instanceof RawHapMapFeature) { // is it an indel? RawHapMapFeature hapmap = (RawHapMapFeature) record; if (hapmap.getAlleles()[0].equals(RawHapMapFeature.NULL_ALLELE_STRING) || hapmap.getAlleles()[1].equals(RawHapMapFeature.NULL_ALLELE_STRING)) { // get the dbsnp object corresponding to this record (needed to help us distinguish // between insertions and deletions) VariantContext dbsnpVC = getDbsnp(hapmap.getName()); if (dbsnpVC == null || dbsnpVC.isMixed()) continue; Map<String, Allele> alleleMap = new HashMap<String, Allele>(2); alleleMap.put( RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion())); alleleMap.put( RawHapMapFeature.INSERTION, Allele.create( (char) ref.getBase() + ((RawHapMapFeature) record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); hapmap.setActualAlleles(alleleMap); // also, use the correct positioning for insertions hapmap.updatePosition(dbsnpVC.getStart()); if (hapmap.getStart() < ref.getWindow().getStart()) { logger.warn( "Hapmap record at " + ref.getLocus() + " represents an indel too large to be converted; skipping..."); continue; } } } // ok, we might actually be able to turn this record in a variant context VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), record, ref); if (vc != null) // sometimes the track has odd stuff in it that can't be converted VCs.add(vc); } } return VCs; }
private void validateAlleles() { // check alleles boolean alreadySeenRef = false, alreadySeenNull = false; for (Allele allele : alleles) { // make sure there's only one reference allele if (allele.isReference()) { if (alreadySeenRef) throw new IllegalArgumentException( "BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this); alreadySeenRef = true; } if (allele.isNoCall()) { throw new IllegalArgumentException( "BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this); } // make sure there's only one null allele if (allele.isNull()) { if (alreadySeenNull) throw new IllegalArgumentException( "BUG: Received two null alleles in VariantContext " + alleles + " this=" + this); alreadySeenNull = true; } } // make sure there's one reference allele if (!alreadySeenRef) throw new IllegalArgumentException("No reference allele found in VariantContext"); // if ( getType() == Type.INDEL ) { // if ( getReference().length() != (getLocation().size()-1) ) { long length = (stop - start) + 1; if ((getReference().isNull() && length != 1) || (getReference().isNonNull() && (length - getReference().length() > 1))) { throw new IllegalStateException( "BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); } }
/** * Gets the sizes of the alternate alleles if they are insertion/deletion events, and returns a * list of their sizes * * @return a list of indel lengths ( null if not of type indel or mixed ) */ public List<Integer> getIndelLengths() { if (getType() != Type.INDEL && getType() != Type.MIXED) { return null; } List<Integer> lengths = new ArrayList<Integer>(); for (Allele a : getAlternateAlleles()) { lengths.add(a.length() - getReference().length()); } return lengths; }
private void validateGenotypes() { if (this.genotypes == null) throw new IllegalStateException("Genotypes is null"); for (final Genotype g : this.genotypes) { if (g.isAvailable()) { for (Allele gAllele : g.getAlleles()) { if (!hasAllele(gAllele) && gAllele.isCalled()) throw new IllegalStateException( "Allele in genotype " + gAllele + " not in the variant context " + alleles); } } } }
/** * helper routine for subcontext * * @param genotypes genotypes * @return allele set */ private final Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) { final Set<Allele> alleles = new HashSet<Allele>(); boolean addedref = false; for (final Genotype g : genotypes) { for (final Allele a : g.getAlleles()) { addedref = addedref || a.isReference(); if (a.isCalled()) alleles.add(a); } } if (!addedref) alleles.add(getReference()); return alleles; }
public static void addComplexGenotypesTest() { final List<Allele> allAlleles = Arrays.asList( Allele.create("A", true), Allele.create("C", false), Allele.create("G", false)); for (int nAlleles : Arrays.asList(2, 3)) { for (int highestPloidy : Arrays.asList(1, 2, 3)) { // site alleles final List<Allele> siteAlleles = allAlleles.subList(0, nAlleles); // possible alleles for genotypes final List<Allele> possibleGenotypeAlleles = new ArrayList<Allele>(siteAlleles); possibleGenotypeAlleles.add(Allele.NO_CALL); // there are n^ploidy possible genotypes final List<List<Allele>> possibleGenotypes = makeAllGenotypes(possibleGenotypeAlleles, highestPloidy); final int nPossibleGenotypes = possibleGenotypes.size(); VariantContextBuilder vb = new VariantContextBuilder("unittest", "1", 1, 1, siteAlleles); // first test -- create n copies of each genotype for (int i = 0; i < nPossibleGenotypes; i++) { final List<Genotype> samples = new ArrayList<Genotype>(3); samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i))); add(vb.genotypes(samples)); } // second test -- create one sample with each genotype { final List<Genotype> samples = new ArrayList<Genotype>(nPossibleGenotypes); for (int i = 0; i < nPossibleGenotypes; i++) { samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i))); } add(vb.genotypes(samples)); } // test mixed ploidy for (int i = 0; i < nPossibleGenotypes; i++) { for (int ploidy = 1; ploidy < highestPloidy; ploidy++) { final List<Genotype> samples = new ArrayList<Genotype>(highestPloidy); final List<Allele> genotype = possibleGenotypes.get(i).subList(0, ploidy); samples.add(GenotypeBuilder.create("sample" + i, genotype)); add(vb.genotypes(samples)); } } } } }
private ReverseClippingPositionTestProvider( final int expectedClip, final String ref, final String... alleles) { super(ReverseClippingPositionTestProvider.class); this.ref = ref; for (final String allele : alleles) this.alleles.add(Allele.create(allele)); this.expectedClip = expectedClip; }
private VariantCallContext generateEmptyContext( RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) { VariantContext vc; if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod( tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles); if (vcInput == null) return null; vc = new VariantContextBuilder( "UG_call", ref.getLocus().getContig(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()) .make(); } else { // deal with bad/non-standard reference bases if (!Allele.acceptableAlleleBases(new byte[] {ref.getBase()})) return null; Set<Allele> alleles = new HashSet<Allele>(); alleles.add(Allele.create(ref.getBase(), true)); vc = new VariantContextBuilder( "UG_call", ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStart(), alleles) .make(); } if (annotationEngine != null) { // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations final ReadBackedPileup pileup = rawContext.getBasePileup(); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc); } return new VariantCallContext(vc, false); }
public char getFirstBase(Allele allele) { byte[] bases = allele.getBases(); if (bases.length > 0) { return (char) bases[0]; } else { return '.'; } }
private static final boolean hasPLIncompatibleAlleles( final Collection<Allele> alleleSet1, final Collection<Allele> alleleSet2) { final Iterator<Allele> it1 = alleleSet1.iterator(); final Iterator<Allele> it2 = alleleSet2.iterator(); while (it1.hasNext() && it2.hasNext()) { final Allele a1 = it1.next(); final Allele a2 = it2.next(); if (!a1.equals(a2)) return true; } // by this point, at least one of the iterators is empty. All of the elements // we've compared are equal up until this point. But it's possible that the // sets aren't the same size, which is indicated by the test below. If they // are of the same size, though, the sets are compatible return it1.hasNext() || it2.hasNext(); }
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) { for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable() if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate return true; } } return false; }
private RepeatDetectorTest( boolean isTrueRepeat, String ref, String refAlleleString, String... altAlleleStrings) { super(RepeatDetectorTest.class); this.ref = "N" + ref; // add a dummy base for the event here this.isTrueRepeat = isTrueRepeat; List<Allele> alleles = new LinkedList<Allele>(); final Allele refAllele = Allele.create(refAlleleString, true); alleles.add(refAllele); for (final String altString : altAlleleStrings) { final Allele alt = Allele.create(altString, false); alleles.add(alt); } VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, 1 + refAllele.length(), alleles); this.vc = builder.make(); }
private Allele ensureMergedAllele( Allele all1, Allele all2, boolean creatingReferenceForFirstTime) { AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2); Allele mergedAllele = mergedAlleles.get(all12); if (mergedAllele == null) { byte[] bases1 = all1.getBases(); byte[] bases2 = all2.getBases(); byte[] mergedBases = new byte[bases1.length + intermediateLength + bases2.length]; System.arraycopy(bases1, 0, mergedBases, 0, bases1.length); if (intermediateBases != null) System.arraycopy(intermediateBases, 0, mergedBases, bases1.length, intermediateLength); System.arraycopy(bases2, 0, mergedBases, bases1.length + intermediateLength, bases2.length); mergedAllele = Allele.create(mergedBases, creatingReferenceForFirstTime); mergedAlleles.put(all12, mergedAllele); } return mergedAllele; }
/** * Returns a context identical to this with the REF and ALT alleles reverse complemented. * * @param vc variant context * @return new vc */ public static VariantContext reverseComplement(VariantContext vc) { // create a mapping from original allele to reverse complemented allele HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size()); for (Allele originalAllele : vc.getAlleles()) { Allele newAllele; if (originalAllele.isNoCall() || originalAllele.isNull()) newAllele = originalAllele; else newAllele = Allele.create( BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); alleleMap.put(originalAllele, newAllele); } // create new Genotype objects GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples()); for (final Genotype genotype : vc.getGenotypes()) { List<Allele> newAlleles = new ArrayList<Allele>(); for (Allele allele : genotype.getAlleles()) { Allele newAllele = alleleMap.get(allele); if (newAllele == null) newAllele = Allele.NO_CALL; newAlleles.add(newAllele); } newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles)); } return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make(); }
public void validateReferenceBases(Allele reference, Byte paddedRefBase) { if (reference == null) return; // don't validate if we're a complex event if (!isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference())) { throw new TribbleException.InternalCodecException( String.format( "the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString())); } // we also need to validate the padding base for simple indels if (hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase)) { throw new TribbleException.InternalCodecException( String.format( "the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), (char) paddedRefBase.byteValue(), (char) getReferenceBaseForIndel().byteValue())); } }
static boolean doubleAllelesSegregatePerfectlyAmongSamples( VariantContext vc1, VariantContext vc2) { // Check that Alleles at vc1 and at vc2 always segregate together in all samples (including // reference): Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>(); Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>(); // Note the segregation of the alleles for the reference genome: allele1ToAllele2.put(vc1.getReference(), vc2.getReference()); allele2ToAllele1.put(vc2.getReference(), vc1.getReference()); // Note the segregation of the alleles for each sample (and check that it is consistent with the // reference and all previous samples). for (final Genotype gt1 : vc1.getGenotypes()) { Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); List<Allele> site1Alleles = gt1.getAlleles(); List<Allele> site2Alleles = gt2.getAlleles(); Iterator<Allele> all2It = site2Alleles.iterator(); for (Allele all1 : site1Alleles) { Allele all2 = all2It.next(); Allele all1To2 = allele1ToAllele2.get(all1); if (all1To2 == null) allele1ToAllele2.put(all1, all2); else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2 return false; Allele all2To1 = allele2ToAllele1.get(all2); if (all2To1 == null) allele2ToAllele1.put(all2, all1); else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1 return false; } } return true; }
/** * Outputs all intervals that are behind the current reference locus * * @param refLocus the current reference locus * @param refBase the reference allele */ private void outputFinishedIntervals(final GenomeLoc refLocus, final byte refBase) { // output any intervals that were finished final List<GenomeLoc> toRemove = new LinkedList<>(); for (GenomeLoc key : intervalMap.keySet()) { if (key.isBefore(refLocus)) { final IntervalStratification intervalStats = intervalMap.get(key); outputStatsToVCF(intervalStats, Allele.create(refBase, true)); if (hasMissingLoci(intervalStats)) { outputMissingInterval(intervalStats); } toRemove.add(key); } } for (GenomeLoc key : toRemove) { intervalMap.remove(key); } }
private static Allele determineReferenceAllele(List<VariantContext> VCs) { Allele ref = null; for (VariantContext vc : VCs) { Allele myRef = vc.getReference(); if (ref == null || ref.length() < myRef.length()) ref = myRef; else if (ref.length() == myRef.length() && !ref.equals(myRef)) throw new UserException.BadInput( String.format( "The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s", vc.getChr(), vc.getStart(), ref, myRef)); } return ref; }
@BeforeSuite public void setup() { final File referenceFile = new File(b37KGReference); try { IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile); genomeLocParser = new GenomeLocParser(seq); } catch (FileNotFoundException ex) { throw new UserException.CouldNotReadInputFile(referenceFile, ex); } // alleles Aref = Allele.create("A", true); Cref = Allele.create("C", true); T = Allele.create("T"); C = Allele.create("C"); ATC = Allele.create("ATC"); ATCATC = Allele.create("ATCATC"); }
/** * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep * or all sites (if this is empty) * * @param reader a just-opened reader sitting at the start of the file * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should * be kept * @param parser a genome loc parser to create genome locs * @return a list of ExactCall objects in reader * @throws IOException */ public static List<ExactCall> readExactLog( final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser) throws IOException { if (reader == null) throw new IllegalArgumentException("reader cannot be null"); if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null"); if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null"); List<ExactCall> calls = new LinkedList<ExactCall>(); // skip the header line reader.readLine(); // skip the first "type" line reader.readLine(); while (true) { final VariantContextBuilder builder = new VariantContextBuilder(); final List<Allele> alleles = new ArrayList<Allele>(); final List<Genotype> genotypes = new ArrayList<Genotype>(); final double[] posteriors = new double[2]; final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true); final List<Integer> mle = new ArrayList<Integer>(); final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(); long runtimeNano = -1; GenomeLoc currentLoc = null; while (true) { final String line = reader.readLine(); if (line == null) return calls; final String[] parts = line.split("\t"); final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]); final String variable = parts[1]; final String key = parts[2]; final String value = parts[3]; if (currentLoc == null) currentLoc = lineLoc; if (variable.equals("type")) { if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) { builder.alleles(alleles); final int stop = currentLoc.getStart() + alleles.get(0).length() - 1; builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop); builder.genotypes(genotypes); final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {})); final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele); calls.add(new ExactCall(builder.make(), runtimeNano, result)); } break; } else if (variable.equals("allele")) { final boolean isRef = key.equals("0"); alleles.add(Allele.create(value, isRef)); } else if (variable.equals("PL")) { final GenotypeBuilder gb = new GenotypeBuilder(key); gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs()); genotypes.add(gb.make()); } else if (variable.equals("log10PosteriorOfAFEq0")) { posteriors[0] = Double.valueOf(value); } else if (variable.equals("log10PosteriorOfAFGt0")) { posteriors[1] = Double.valueOf(value); } else if (variable.equals("MLE")) { mle.add(Integer.valueOf(value)); } else if (variable.equals("pNonRefByAllele")) { final Allele a = Allele.create(key); log10pNonRefByAllele.put(a, Double.valueOf(value)); } else if (variable.equals("runtime.nano")) { runtimeNano = Long.valueOf(value); } else { // nothing to do } } } }
public void writeBeagleOutput( VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) { GenomeLoc currentLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), preferredVC); StringBuffer beagleOut = new StringBuffer(); String marker = String.format("%s:%d ", currentLoc.getContig(), currentLoc.getStart()); beagleOut.append(marker); if (markers != null) markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t"); for (Allele allele : preferredVC.getAlleles()) { String bglPrintString; if (allele.isNoCall() || allele.isNull()) bglPrintString = "-"; else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele beagleOut.append(String.format("%s ", bglPrintString)); if (markers != null) markers.append(bglPrintString).append("\t"); } if (markers != null) markers.append("\n"); GenotypesContext preferredGenotypes = preferredVC.getGenotypes(); GenotypesContext otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null; for (String sample : samples) { boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE; Genotype genotype; boolean isValidation; // use sample as key into genotypes structure if (preferredGenotypes.containsSample(sample)) { genotype = preferredGenotypes.get(sample); isValidation = isValidationSite; } else if (otherGenotypes != null && otherGenotypes.containsSample(sample)) { genotype = otherGenotypes.get(sample); isValidation = !isValidationSite; } else { // there is magically no genotype for this sample. throw new StingException( "Sample " + sample + " arose with no genotype in variant or validation VCF. This should never happen."); } /* * Use likelihoods if: is validation, prior is negative; or: is not validation, has genotype key */ double[] log10Likelihoods = null; if ((isValidation && prior < 0.0) || genotype.hasLikelihoods()) { log10Likelihoods = genotype.getLikelihoods().getAsVector(); // see if we need to randomly mask out genotype in this position. if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() <= insertedNoCallRate) { // we are masking out this genotype log10Likelihoods = isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS; } if (isMaleOnChrX) { log10Likelihoods[1] = -255; // todo -- warning this is dangerous for multi-allele case } } /** otherwise, use the prior uniformly */ else if (!isValidation && genotype.isCalled() && !genotype.hasLikelihoods()) { // hack to deal with input VCFs with no genotype likelihoods. Just assume the called // genotype // is confident. This is useful for Hapmap and 1KG release VCFs. double AA = (1.0 - prior) / 2.0; double AB = (1.0 - prior) / 2.0; double BB = (1.0 - prior) / 2.0; if (genotype.isHomRef()) { AA = prior; } else if (genotype.isHet()) { AB = prior; } else if (genotype.isHomVar()) { BB = prior; } log10Likelihoods = MathUtils.toLog10(new double[] {AA, isMaleOnChrX ? 0.0 : AB, BB}); } else { log10Likelihoods = isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS; } writeSampleLikelihoods(beagleOut, preferredVC, log10Likelihoods); } beagleWriter.println(beagleOut.toString()); }