private static List<String> calcVCFGenotypeKeys(VariantContext vc) { Set<String> keys = new HashSet<String>(); boolean sawGoodGT = false; boolean sawGoodQual = false; boolean sawGenotypeFilter = false; for (Genotype g : vc.getGenotypes().values()) { keys.addAll(g.getAttributes().keySet()); if (g.isAvailable()) sawGoodGT = true; if (g.hasNegLog10PError()) sawGoodQual = true; if (g.isFiltered() && g.isCalled()) sawGenotypeFilter = true; } if (sawGoodQual) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY); if (sawGenotypeFilter) keys.add(VCFConstants.GENOTYPE_FILTER_KEY); List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys)); // make sure the GT is first if (sawGoodGT) { List<String> newList = new ArrayList<String>(sortedList.size() + 1); newList.add(VCFConstants.GENOTYPE_KEY); newList.addAll(sortedList); sortedList = newList; } return sortedList; }
/** * add the genotype data * * @param vc the variant context * @param genotypeFormatKeys Genotype formatting string * @param alleleMap alleles for this context * @throws IOException for writer */ private void addGenotypeData( VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys) throws IOException { for (String sample : mHeader.getGenotypeSamples()) { mWriter.write(VCFConstants.FIELD_SEPARATOR); Genotype g = vc.getGenotype(sample); if (g == null) { // TODO -- The VariantContext needs to know what the general ploidy is of the samples // TODO -- We shouldn't be assuming diploid genotypes here! mWriter.write(VCFConstants.EMPTY_GENOTYPE); continue; } List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size()); for (String key : genotypeFormatKeys) { if (key.equals(VCFConstants.GENOTYPE_KEY)) { if (!g.isAvailable()) { throw new ReviewedStingException( "GTs cannot be missing for some samples if they are available for others in the record"); } writeAllele(g.getAllele(0), alleleMap); for (int i = 1; i < g.getPloidy(); i++) { mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); writeAllele(g.getAllele(i), alleleMap); } continue; } Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4; // some exceptions if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) { if (Math.abs(g.getNegLog10PError() - Genotype.NO_NEG_LOG_10PERROR) < 1e-6) val = VCFConstants.MISSING_VALUE_v4; else { val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL)); } } else if (key.equals(VCFConstants.GENOTYPE_FILTER_KEY)) { val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); } VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key); if (metaData != null) { int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size()); if (numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4)) { // If we have a missing field but multiple values are expected, we need to construct a // new string with all fields. // For example, if Number=2, the string has to be ".,." StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4); for (int i = 1; i < numInFormatField; i++) { sb.append(","); sb.append(VCFConstants.MISSING_VALUE_v4); } val = sb.toString(); } } // assume that if key is absent, then the given string encoding suffices String outputValue = formatVCFField(val); if (outputValue != null) attrs.add(outputValue); } // strip off trailing missing values for (int i = attrs.size() - 1; i >= 0; i--) { if (isMissingValue(attrs.get(i))) attrs.remove(i); else break; } for (int i = 0; i < attrs.size(); i++) { if (i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY)) mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); mWriter.write(attrs.get(i)); } } }