public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) { if (!context.isSNP() || !context.isBiallelic()) throw new IllegalStateException( "Requested SNP substitution type for bialleic non-SNP " + context); return BaseUtils.SNPSubstitutionType( context.getReference().getBases()[0], context.getAlternateAllele(0).getBases()[0]); }
@Requires({"eval != null", "comp != null"}) private EvalCompMatchType doEvalAndCompMatch( final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) { // find all of the matching comps if (comp.getType() != eval.getType()) return EvalCompMatchType.NO_MATCH; // find the comp which matches both the reference allele and alternate allele from eval final Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0); final Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0); if ((altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference()))) return EvalCompMatchType.STRICT; else return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT; }
private Map<String, Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) { if (!stratifiedContext.hasBasePileup()) return null; HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>(); for (Allele allele : vc.getAlternateAlleles()) alleleCounts.put(allele.getBases()[0], 0); ReadBackedPileup pileup = stratifiedContext.getBasePileup(); int totalDepth = pileup.size(); Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), totalDepth); // put total depth in right away if (totalDepth == 0) return map; // done, can not compute FA at 0 coverage!! int mq0 = 0; // number of "ref" reads that are acually mq0 for (PileupElement p : pileup) { if (p.getMappingQual() == 0) { mq0++; continue; } if (alleleCounts.containsKey(p.getBase())) // non-mq0 read and it's an alt alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase()) + 1); } if (mq0 == totalDepth) return map; // if all reads are mq0, there is nothing left to do // we need to add counts in the correct order String[] fracs = new String[alleleCounts.size()]; for (int i = 0; i < vc.getAlternateAlleles().size(); i++) { fracs[i] = String.format( "%.3f", ((float) alleleCounts.get(vc.getAlternateAllele(i).getBases()[0])) / (totalDepth - mq0)); } map.put(getKeyNames().get(1), fracs); return map; }
public String update1( VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { nCalledLoci++; // Note from Eric: // This is really not correct. What we really want here is a polymorphic vs. monomorphic count // (i.e. on the Genotypes). // So in order to maintain consistency with the previous implementation (and the intention of // the original author), I've // added in a proxy check for monomorphic status here. // Protect against case when vc only as no-calls too - can happen if we strafity by sample and // sample as a single no-call. if (vc1.isMonomorphicInSamples()) { nRefLoci++; } else { switch (vc1.getType()) { case NO_VARIATION: // shouldn't get here break; case SNP: nVariantLoci++; nSNPs++; if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++; break; case MNP: nVariantLoci++; nMNPs++; if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++; break; case INDEL: nVariantLoci++; if (vc1.isSimpleInsertion()) nInsertions++; else if (vc1.isSimpleDeletion()) nDeletions++; else nComplex++; break; case MIXED: nVariantLoci++; nMixed++; break; case SYMBOLIC: nSymbolic++; break; default: throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType()); } } String refStr = vc1.getReference().getBaseString().toUpperCase(); String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null; // if (aaStr.equals(".")) { // aaStr = refStr; // } // ref aa alt class // A C A der homozygote // A C C anc homozygote // A A A ref homozygote // A A C // A C A // A C C for (final Genotype g : vc1.getGenotypes()) { final String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null; switch (g.getType()) { case NO_CALL: nNoCalls++; break; case HOM_REF: nHomRef++; if (aaStr != null && altStr != null && !refStr.equalsIgnoreCase(aaStr)) { nHomDerived++; } break; case HET: nHets++; break; case HOM_VAR: nHomVar++; if (aaStr != null && altStr != null && !altStr.equalsIgnoreCase(aaStr)) { nHomDerived++; } break; case MIXED: break; default: throw new ReviewedStingException("BUG: Unexpected genotype type: " + g); } } return null; // we don't capture any interesting sites }
/** * add a record to the file * * @param vc the Variant Context object * @param refBase the ref base used for indels * @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE * EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER * THE EVENT INSTEAD) */ public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) { if (mHeader == null) throw new IllegalStateException( "The VCF Header must be written before records can be added: " + locationString()); if (doNotWriteGenotypes) vc = VariantContext.modifyGenotypes(vc, null); try { vc = VariantContext.createVariantContextWithPaddedAlleles( vc, refBase, refBaseShouldBeAppliedToEndOfAlleles); // if we are doing on the fly indexing, add the record ***before*** we write any bytes if (indexer != null) indexer.addFeature(vc, positionalStream.getPosition()); Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size()); alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup // CHROM mWriter.write(vc.getChr()); mWriter.write(VCFConstants.FIELD_SEPARATOR); // POS mWriter.write(String.valueOf(vc.getStart())); mWriter.write(VCFConstants.FIELD_SEPARATOR); // ID String ID = vc.hasID() ? vc.getID() : VCFConstants.EMPTY_ID_FIELD; mWriter.write(ID); mWriter.write(VCFConstants.FIELD_SEPARATOR); // REF alleleMap.put(vc.getReference(), "0"); String refString = vc.getReference().getDisplayString(); mWriter.write(refString); mWriter.write(VCFConstants.FIELD_SEPARATOR); // ALT if (vc.isVariant()) { Allele altAllele = vc.getAlternateAllele(0); alleleMap.put(altAllele, "1"); String alt = altAllele.getDisplayString(); mWriter.write(alt); for (int i = 1; i < vc.getAlternateAlleles().size(); i++) { altAllele = vc.getAlternateAllele(i); alleleMap.put(altAllele, String.valueOf(i + 1)); alt = altAllele.getDisplayString(); mWriter.write(","); mWriter.write(alt); } } else { mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); } mWriter.write(VCFConstants.FIELD_SEPARATOR); // QUAL if (!vc.hasNegLog10PError()) mWriter.write(VCFConstants.MISSING_VALUE_v4); else mWriter.write(getQualValue(vc.getPhredScaledQual())); mWriter.write(VCFConstants.FIELD_SEPARATOR); // FILTER String filters = vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); mWriter.write(filters); mWriter.write(VCFConstants.FIELD_SEPARATOR); // INFO Map<String, String> infoFields = new TreeMap<String, String>(); for (Map.Entry<String, Object> field : vc.getAttributes().entrySet()) { String key = field.getKey(); if (key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY)) continue; String outputValue = formatVCFField(field.getValue()); if (outputValue != null) infoFields.put(key, outputValue); } writeInfoString(infoFields); // FORMAT if (vc.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)) { mWriter.write(VCFConstants.FIELD_SEPARATOR); mWriter.write(vc.getAttributeAsString(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, "")); } else { List<String> genotypeAttributeKeys = new ArrayList<String>(); if (vc.hasGenotypes()) { genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc)); } else if (mHeader.hasGenotypingData()) { // this needs to be done in case all samples are no-calls genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); } if (genotypeAttributeKeys.size() > 0) { String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys); mWriter.write(VCFConstants.FIELD_SEPARATOR); mWriter.write(genotypeFormatString); addGenotypeData(vc, alleleMap, genotypeAttributeKeys); } } mWriter.write("\n"); mWriter.flush(); // necessary so that writing to an output stream will work } catch (IOException e) { throw new RuntimeException("Unable to write the VCF object to " + locationString()); } }
/** * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for * printing) * * @param tracker the ROD tracker * @param ref reference information * @param context alignment info * @return 1 if the record was printed to the output file, 0 if otherwise */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null) return 0; Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation()); if (vcs == null || vcs.size() == 0) { return 0; } for (VariantContext vc : vcs) { if (MENDELIAN_VIOLATIONS) { boolean foundMV = false; for (MendelianViolation mv : mvSet) { if (mv.isViolation(vc)) { foundMV = true; // System.out.println(vc.toString()); if (outMVFile != null) outMVFileStream.format( "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + "childG=%s childGL=%s\n", vc.getChr(), vc.getStart(), vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)), mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(mv.getSampleChild()).toBriefString(), vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString()); } } if (!foundMV) break; } if (DISCORDANCE_ONLY) { Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) return 0; } if (CONCORDANCE_ONLY) { Collection<VariantContext> compVCs = tracker.getValues(concordanceTrack, context.getLocation()); if (!isConcordant(vc, compVCs)) return 0; } if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic()) continue; if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic()) continue; if (!selectedTypes.contains(vc.getType())) continue; VariantContext sub = subsetRecord(vc, samples); if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED)) { for (VariantContextUtils.JexlVCMatchExp jexl : jexls) { if (!VariantContextUtils.match(sub, jexl)) { return 0; } } if (SELECT_RANDOM_NUMBER) { randomlyAddVariant(++variantNumber, sub, ref.getBase()); } else if (!SELECT_RANDOM_FRACTION || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { vcfWriter.add(sub); } } } return 1; }
private Map<String, Object> annotateIndel(AlignmentContext stratifiedContext, VariantContext vc) { if (!stratifiedContext.hasExtendedEventPileup()) { return null; } ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup(); if (pileup == null) return null; int totalDepth = pileup.size(); Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), totalDepth); // put total depth in right away if (totalDepth == 0) return map; int mq0 = 0; // number of "ref" reads that are acually mq0 HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>(); Allele refAllele = vc.getReference(); for (Allele allele : vc.getAlternateAlleles()) { if (allele.isNoCall()) { continue; // this does not look so good, should we die??? } alleleCounts.put(getAlleleRepresentation(allele), 0); } for (ExtendedEventPileupElement e : pileup.toExtendedIterable()) { if (e.getMappingQual() == 0) { mq0++; continue; } if (e.isInsertion()) { final String b = e.getEventBases(); if (alleleCounts.containsKey(b)) { alleleCounts.put(b, alleleCounts.get(b) + 1); } } else { if (e.isDeletion()) { if (e.getEventLength() == refAllele.length()) { // this is indeed the deletion allele recorded in VC final String b = DEL; if (alleleCounts.containsKey(b)) { alleleCounts.put(b, alleleCounts.get(b) + 1); } } // else { // System.out.print(" deletion of WRONG length found"); // } } } } if (mq0 == totalDepth) return map; String[] fracs = new String[alleleCounts.size()]; for (int i = 0; i < vc.getAlternateAlleles().size(); i++) fracs[i] = String.format( "%.3f", ((float) alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i)))) / (totalDepth - mq0)); map.put(getKeyNames().get(1), fracs); // map.put(getKeyNames().get(0), counts); return map; }