@Test public void testVCFHeaderSampleRenamingSingleSampleVCF() throws Exception { final VCFCodec codec = new VCFCodec(); codec.setRemappedSampleName("FOOSAMPLE"); final AsciiLineReaderIterator vcfIterator = new AsciiLineReaderIterator( new AsciiLineReader(new FileInputStream(variantTestDataRoot + "HiSeq.10000.vcf"))); final VCFHeader header = (VCFHeader) codec.readHeader(vcfIterator).getHeaderValue(); Assert.assertEquals( header.getNGenotypeSamples(), 1, "Wrong number of samples in remapped header"); Assert.assertEquals( header.getGenotypeSamples().get(0), "FOOSAMPLE", "Sample name in remapped header has incorrect value"); int recordCount = 0; while (vcfIterator.hasNext() && recordCount < 10) { recordCount++; final VariantContext vcfRecord = codec.decode(vcfIterator.next()); Assert.assertEquals( vcfRecord.getSampleNames().size(), 1, "Wrong number of samples in vcf record after remapping"); Assert.assertEquals( vcfRecord.getSampleNames().iterator().next(), "FOOSAMPLE", "Wrong sample in vcf record after remapping"); } }
// this method is intended to reconcile uniquified sample names // it comes into play when calling this annotation from GenotypeGVCFs with --uniquifySamples // because founderIds // is derived from the sampleDB, which comes from the input sample names, but vc will have // uniquified (i.e. different) // sample names. Without this check, the founderIds won't be found in the vc and the annotation // won't be calculated. protected static Set<String> validateFounderIDs( final Set<String> founderIds, final VariantContext vc) { Set<String> vcSamples = new HashSet<>(); Set<String> returnIDs = founderIds; vcSamples.addAll(vc.getSampleNames()); if (!vcSamples.isEmpty()) { if (founderIds != null) { vcSamples.removeAll(founderIds); if (vcSamples.equals(vc.getSampleNames())) returnIDs = vc.getSampleNames(); } } return returnIDs; }
private void addVariant(final VariantContext ctx) { if (!ctx.getChr().equals(genes.get(0).getChromosome())) return; if (ctx.getStart() >= chromEnd) return; if (ctx.getStart() < chromStart) return; positions.add(ctx.getStart()); for (String sample : ctx.getSampleNames()) { Genotype g = ctx.getGenotype(sample); if (!g.isAvailable()) continue; if (!g.isCalled()) continue; if (g.isNoCall()) continue; if (g.isNonInformative()) continue; Set<Integer> set = sample2positions.get(sample); if (set == null) { set = new HashSet<Integer>(); sample2positions.put(sample, set); } set.add(ctx.getStart()); } }
private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) { if (!wroteHeader) { wroteHeader = true; // setup the header fields Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY)); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for (VCFHeaderLine field : hInfo) { if (field instanceof VCFFormatHeaderLine) { allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID()); } } samples = new LinkedHashSet<String>(); if (sampleName != null) { samples.add(sampleName); } else { // try VCF first samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); if (samples.isEmpty()) { List<Feature> features = tracker.getValues(variants, loc); if (features.size() == 0) throw new IllegalStateException( "No rod data is present, but we just created a VariantContext"); Feature f = features.get(0); if (f instanceof RawHapMapFeature) samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs())); else samples.addAll(vc.getSampleNames()); } } vcfwriter.writeHeader(new VCFHeader(hInfo, samples)); } vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); vcfwriter.add(vc); }