@Override public void update2( VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (comp != null) { // we only need to consider sites in comp if (REQUIRE_IDENTICAL_ALLELES && (eval != null && haveDifferentAltAlleles(eval, comp))) nDifferentAlleleSites++; else { SiteStatus evalStatus = calcSiteStatus(eval); final Set<String> evalSamples = getWalker().getSampleNamesForEvaluation(); if (comp.hasGenotypes() && !evalSamples.isEmpty() && comp.hasGenotypes(evalSamples)) // if we have genotypes in both eval and comp, subset comp down just the samples in eval comp = comp.subContextFromSamples(evalSamples, false); SiteStatus compStatus = calcSiteStatus(comp); counts[compStatus.ordinal()][evalStatus.ordinal()]++; } } }
/** * Update the attributes of the attributes map given the VariantContext to reflect the proper * chromosome-based VCF tags * * @param vc the VariantContext * @param attributes the attributes map to populate; must not be null; may contain old values * @param removeStaleValues should we remove stale values from the mapping? * @return the attributes map provided as input, returned for programming convenience */ public static Map<String, Object> calculateChromosomeCounts( VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) { final int AN = vc.getCalledChrCount(); // if everyone is a no-call, remove the old attributes if requested if (AN == 0 && removeStaleValues) { if (attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY)) attributes.remove(VCFConstants.ALLELE_COUNT_KEY); if (attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY)) attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY); if (attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY)) attributes.remove(VCFConstants.ALLELE_NUMBER_KEY); return attributes; } if (vc.hasGenotypes()) { attributes.put(VCFConstants.ALLELE_NUMBER_KEY, AN); // if there are alternate alleles, record the relevant tags if (vc.getAlternateAlleles().size() > 0) { final ArrayList<String> alleleFreqs = new ArrayList<String>(); final ArrayList<Integer> alleleCounts = new ArrayList<Integer>(); for (Allele allele : vc.getAlternateAlleles()) { int altChromosomes = vc.getCalledChrCount(allele); alleleCounts.add(altChromosomes); if (AN == 0) { alleleFreqs.add("0.0"); } else { // todo -- this is a performance problem final String freq = String.format( makePrecisionFormatStringFromDenominatorValue((double) AN), ((double) altChromosomes / (double) AN)); alleleFreqs.add(freq); } } attributes.put( VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); attributes.put( VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs); } else { attributes.put(VCFConstants.ALLELE_COUNT_KEY, 0); attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0); } } return attributes; }
// // helper routines // private SiteStatus calcSiteStatus(VariantContext vc) { if (vc == null) return SiteStatus.NO_CALL; if (vc.isFiltered()) return SiteStatus.FILTERED; if (vc.isMonomorphicInSamples()) return SiteStatus.MONO; if (vc.hasGenotypes()) return SiteStatus .POLY; // must be polymorphic if isMonomorphicInSamples was false and there are genotypes if (vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) { int ac = 0; if (vc.getNAlleles() > 2) { return SiteStatus.POLY; } else ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0); return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO; } else { return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do } }
public static boolean canBeOutputToBeagle(VariantContext v) { return v != null && !v.isFiltered() && v.isBiallelic() && v.hasGenotypes(); }
/** * add a record to the file * * @param vc the Variant Context object * @param refBase the ref base used for indels * @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE * EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER * THE EVENT INSTEAD) */ public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) { if (mHeader == null) throw new IllegalStateException( "The VCF Header must be written before records can be added: " + locationString()); if (doNotWriteGenotypes) vc = VariantContext.modifyGenotypes(vc, null); try { vc = VariantContext.createVariantContextWithPaddedAlleles( vc, refBase, refBaseShouldBeAppliedToEndOfAlleles); // if we are doing on the fly indexing, add the record ***before*** we write any bytes if (indexer != null) indexer.addFeature(vc, positionalStream.getPosition()); Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size()); alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup // CHROM mWriter.write(vc.getChr()); mWriter.write(VCFConstants.FIELD_SEPARATOR); // POS mWriter.write(String.valueOf(vc.getStart())); mWriter.write(VCFConstants.FIELD_SEPARATOR); // ID String ID = vc.hasID() ? vc.getID() : VCFConstants.EMPTY_ID_FIELD; mWriter.write(ID); mWriter.write(VCFConstants.FIELD_SEPARATOR); // REF alleleMap.put(vc.getReference(), "0"); String refString = vc.getReference().getDisplayString(); mWriter.write(refString); mWriter.write(VCFConstants.FIELD_SEPARATOR); // ALT if (vc.isVariant()) { Allele altAllele = vc.getAlternateAllele(0); alleleMap.put(altAllele, "1"); String alt = altAllele.getDisplayString(); mWriter.write(alt); for (int i = 1; i < vc.getAlternateAlleles().size(); i++) { altAllele = vc.getAlternateAllele(i); alleleMap.put(altAllele, String.valueOf(i + 1)); alt = altAllele.getDisplayString(); mWriter.write(","); mWriter.write(alt); } } else { mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); } mWriter.write(VCFConstants.FIELD_SEPARATOR); // QUAL if (!vc.hasNegLog10PError()) mWriter.write(VCFConstants.MISSING_VALUE_v4); else mWriter.write(getQualValue(vc.getPhredScaledQual())); mWriter.write(VCFConstants.FIELD_SEPARATOR); // FILTER String filters = vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); mWriter.write(filters); mWriter.write(VCFConstants.FIELD_SEPARATOR); // INFO Map<String, String> infoFields = new TreeMap<String, String>(); for (Map.Entry<String, Object> field : vc.getAttributes().entrySet()) { String key = field.getKey(); if (key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY)) continue; String outputValue = formatVCFField(field.getValue()); if (outputValue != null) infoFields.put(key, outputValue); } writeInfoString(infoFields); // FORMAT if (vc.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)) { mWriter.write(VCFConstants.FIELD_SEPARATOR); mWriter.write(vc.getAttributeAsString(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, "")); } else { List<String> genotypeAttributeKeys = new ArrayList<String>(); if (vc.hasGenotypes()) { genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc)); } else if (mHeader.hasGenotypingData()) { // this needs to be done in case all samples are no-calls genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); } if (genotypeAttributeKeys.size() > 0) { String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys); mWriter.write(VCFConstants.FIELD_SEPARATOR); mWriter.write(genotypeFormatString); addGenotypeData(vc, alleleMap, genotypeAttributeKeys); } } mWriter.write("\n"); mWriter.flush(); // necessary so that writing to an output stream will work } catch (IOException e) { throw new RuntimeException("Unable to write the VCF object to " + locationString()); } }