示例#1
0
  @Test
  public void testVCFHeaderSampleRenamingSingleSampleVCF() throws Exception {
    final VCFCodec codec = new VCFCodec();
    codec.setRemappedSampleName("FOOSAMPLE");
    final AsciiLineReaderIterator vcfIterator =
        new AsciiLineReaderIterator(
            new AsciiLineReader(new FileInputStream(variantTestDataRoot + "HiSeq.10000.vcf")));
    final VCFHeader header = (VCFHeader) codec.readHeader(vcfIterator).getHeaderValue();

    Assert.assertEquals(
        header.getNGenotypeSamples(), 1, "Wrong number of samples in remapped header");
    Assert.assertEquals(
        header.getGenotypeSamples().get(0),
        "FOOSAMPLE",
        "Sample name in remapped header has incorrect value");

    int recordCount = 0;
    while (vcfIterator.hasNext() && recordCount < 10) {
      recordCount++;
      final VariantContext vcfRecord = codec.decode(vcfIterator.next());

      Assert.assertEquals(
          vcfRecord.getSampleNames().size(),
          1,
          "Wrong number of samples in vcf record after remapping");
      Assert.assertEquals(
          vcfRecord.getSampleNames().iterator().next(),
          "FOOSAMPLE",
          "Wrong sample in vcf record after remapping");
    }
  }
 // this method is intended to reconcile uniquified sample names
 // it comes into play when calling this annotation from GenotypeGVCFs with --uniquifySamples
 // because founderIds
 // is derived from the sampleDB, which comes from the input sample names, but vc will have
 // uniquified (i.e. different)
 // sample names. Without this check, the founderIds won't be found in the vc and the annotation
 // won't be calculated.
 protected static Set<String> validateFounderIDs(
     final Set<String> founderIds, final VariantContext vc) {
   Set<String> vcSamples = new HashSet<>();
   Set<String> returnIDs = founderIds;
   vcSamples.addAll(vc.getSampleNames());
   if (!vcSamples.isEmpty()) {
     if (founderIds != null) {
       vcSamples.removeAll(founderIds);
       if (vcSamples.equals(vc.getSampleNames())) returnIDs = vc.getSampleNames();
     }
   }
   return returnIDs;
 }
示例#3
0
 private void addVariant(final VariantContext ctx) {
   if (!ctx.getChr().equals(genes.get(0).getChromosome())) return;
   if (ctx.getStart() >= chromEnd) return;
   if (ctx.getStart() < chromStart) return;
   positions.add(ctx.getStart());
   for (String sample : ctx.getSampleNames()) {
     Genotype g = ctx.getGenotype(sample);
     if (!g.isAvailable()) continue;
     if (!g.isCalled()) continue;
     if (g.isNoCall()) continue;
     if (g.isNonInformative()) continue;
     Set<Integer> set = sample2positions.get(sample);
     if (set == null) {
       set = new HashSet<Integer>();
       sample2positions.put(sample, set);
     }
     set.add(ctx.getStart());
   }
 }
示例#4
0
  private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) {
    if (!wroteHeader) {
      wroteHeader = true;

      // setup the header fields
      Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
      hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
      hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));

      allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
      for (VCFHeaderLine field : hInfo) {
        if (field instanceof VCFFormatHeaderLine) {
          allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID());
        }
      }

      samples = new LinkedHashSet<String>();
      if (sampleName != null) {
        samples.add(sampleName);
      } else {
        // try VCF first
        samples =
            SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));

        if (samples.isEmpty()) {
          List<Feature> features = tracker.getValues(variants, loc);
          if (features.size() == 0)
            throw new IllegalStateException(
                "No rod data is present, but we just created a VariantContext");

          Feature f = features.get(0);
          if (f instanceof RawHapMapFeature)
            samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs()));
          else samples.addAll(vc.getSampleNames());
        }
      }

      vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
    }

    vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
    vcfwriter.add(vc);
  }