Пример #1
0
  private void purgeQueue() {

    final ReferenceContext refContext = queue.getFirst().ref;

    // divide them up by source
    while (!queue.isEmpty()) {
      VCcontext context = queue.removeFirst();
      for (final VariantContext vc : context.vcs) {
        if (vc.getSource().equals(source1)) sourceVCs1.add(vc);
        else sourceVCs2.add(vc);
      }
    }

    writeAndPurgeAllEqualVariants(sourceVCs1, sourceVCs2, SAME_STATUS);

    if (sourceVCs1.isEmpty()) {
      writeAll(sourceVCs2, source2, null);
    } else if (sourceVCs2.isEmpty()) {
      writeAll(sourceVCs1, source1, null);
    } else {
      resolveByHaplotype(refContext);
    }

    // allow for GC of the data
    sourceVCs1.clear();
    sourceVCs2.clear();
  }
Пример #2
0
  @Test
  public void testVCFHeaderSampleRenamingSingleSampleVCF() throws Exception {
    final VCFCodec codec = new VCFCodec();
    codec.setRemappedSampleName("FOOSAMPLE");
    final AsciiLineReaderIterator vcfIterator =
        new AsciiLineReaderIterator(
            new AsciiLineReader(new FileInputStream(variantTestDataRoot + "HiSeq.10000.vcf")));
    final VCFHeader header = (VCFHeader) codec.readHeader(vcfIterator).getHeaderValue();

    Assert.assertEquals(
        header.getNGenotypeSamples(), 1, "Wrong number of samples in remapped header");
    Assert.assertEquals(
        header.getGenotypeSamples().get(0),
        "FOOSAMPLE",
        "Sample name in remapped header has incorrect value");

    int recordCount = 0;
    while (vcfIterator.hasNext() && recordCount < 10) {
      recordCount++;
      final VariantContext vcfRecord = codec.decode(vcfIterator.next());

      Assert.assertEquals(
          vcfRecord.getSampleNames().size(),
          1,
          "Wrong number of samples in vcf record after remapping");
      Assert.assertEquals(
          vcfRecord.getSampleNames().iterator().next(),
          "FOOSAMPLE",
          "Wrong sample in vcf record after remapping");
    }
  }
Пример #3
0
  private VariantContext getDbsnp(String rsID) {
    if (dbsnpIterator == null) {

      if (dbsnp == null)
        throw new UserException.BadInput(
            "No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records");

      RMDTrackBuilder builder =
          new RMDTrackBuilder(
              getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
              getToolkit().getGenomeLocParser(),
              getToolkit().getArguments().unsafe,
              getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods,
              null);
      dbsnpIterator =
          builder
              .createInstanceOfTrack(VCFCodec.class, new File(dbsnp.dbsnp.getSource()))
              .getIterator();
      // Note that we should really use some sort of seekable iterator here so that the search
      // doesn't take forever
      // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we
      // don't know where to seek to)
    }

    while (dbsnpIterator.hasNext()) {
      GATKFeature feature = dbsnpIterator.next();
      VariantContext vc = (VariantContext) feature.getUnderlyingObject();
      if (vc.getID().equals(rsID)) return vc;
    }

    return null;
  }
  /**
   * For each variant in the file, determine the phasing for the child and replace the child's
   * genotype with the trio's genotype
   *
   * @param tracker the reference meta-data tracker
   * @param ref the reference context
   * @param context the alignment context
   * @return null
   */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker != null) {
      for (VariantContext vc :
          tracker.getValues(variantCollection.variants, context.getLocation())) {
        vc = vc.subContextFromSamples(samples);
        if (!vc.isPolymorphicInSamples()) continue;

        double log10pSomatic = calcLog10pSomatic(vc);

        // write in the somatic status probability
        Map<String, Object> attrs = new HashMap<String, Object>(); // vc.getAttributes());
        if (!minimalVCF) attrs.putAll(vc.getAttributes());
        attrs.put(SOMATIC_LOD_TAG_NAME, log10pSomatic);
        if (log10pSomatic > somaticMinLOD) {
          attrs.put(VCFConstants.SOMATIC_KEY, true);
          attrs.put(SOMATIC_NONREF_TAG_NAME, calculateTumorNNR(vc));
          attrs.put(SOMATIC_AC_TAG_NAME, calculateTumorAC(vc));
        }
        final VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(attrs);
        VariantContextUtils.calculateChromosomeCounts(builder, false);
        VariantContext newvc = builder.make();

        vcfWriter.add(newvc);
      }

      return null;
    }

    return null;
  }
Пример #5
0
  private boolean determineAndWriteOverlap(
      final VariantContext vc1, final VariantContext vc2, final String status) {
    final int allelesFrom1In2 = findOverlap(vc1, vc2);
    final int allelesFrom2In1 = findOverlap(vc2, vc1);
    final int totalAllelesIn1 = vc1.getAlternateAlleles().size();
    final int totalAllelesIn2 = vc2.getAlternateAlleles().size();

    final boolean allAllelesFrom1Overlap = allelesFrom1In2 == totalAllelesIn1;
    final boolean allAllelesFrom2Overlap = allelesFrom2In1 == totalAllelesIn2;

    boolean thereIsOverlap = true;

    if (allAllelesFrom1Overlap && allAllelesFrom2Overlap) {
      writeOne(vc1, INTERSECTION_SET, status);
    } else if (allAllelesFrom1Overlap) {
      writeOne(vc2, INTERSECTION_SET, source1 + "IsSubsetOf" + source2);
    } else if (allAllelesFrom2Overlap) {
      writeOne(vc1, INTERSECTION_SET, source2 + "IsSubsetOf" + source1);
    } else if (allelesFrom1In2 > 0) {
      writeOne(vc1, INTERSECTION_SET, SOME_ALLELES_MATCH_STATUS);
    } else if (totalAllelesIn1 > 1
        || totalAllelesIn2
            > 1) { // we don't handle multi-allelics in the haplotype-based reconstruction
      writeOne(vc1, INTERSECTION_SET, SAME_START_DIFFERENT_ALLELES_STATUS);
    } else {
      thereIsOverlap = false;
    }

    return thereIsOverlap;
  }
Пример #6
0
 private static int findOverlap(final VariantContext target, final VariantContext comparison) {
   int overlap = 0;
   for (final Allele allele : target.getAlternateAlleles()) {
     if (comparison.hasAlternateAllele(allele)) overlap++;
   }
   return overlap;
 }
 private static void testVCsAreEqual(
     final List<VariantContext> VCs1, final List<VariantContext> VCs2) {
   Assert.assertEquals(VCs1.size(), VCs2.size(), "number of Variant Contexts");
   for (int i = 0; i < VCs1.size(); i++) {
     final VariantContext vc1 = VCs1.get(i);
     final VariantContext vc2 = VCs2.get(i);
     Assert.assertEquals(vc1.toStringDecodeGenotypes(), vc2.toStringDecodeGenotypes());
   }
 }
Пример #8
0
 @Override
 public VariantContext next() {
   try {
     final VariantContext vc = codec.decode(source);
     return vc == null ? null : vc.fullyDecode(header, false);
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }
Пример #9
0
  private Collection<VariantContext> getVariantContexts(
      RefMetaDataTracker tracker, ReferenceContext ref) {

    List<Feature> features = tracker.getValues(variants, ref.getLocus());
    List<VariantContext> VCs = new ArrayList<VariantContext>(features.size());

    for (Feature record : features) {
      if (VariantContextAdaptors.canBeConvertedToVariantContext(record)) {
        // we need to special case the HapMap format because indels aren't handled correctly
        if (record instanceof RawHapMapFeature) {

          // is it an indel?
          RawHapMapFeature hapmap = (RawHapMapFeature) record;
          if (hapmap.getAlleles()[0].equals(RawHapMapFeature.NULL_ALLELE_STRING)
              || hapmap.getAlleles()[1].equals(RawHapMapFeature.NULL_ALLELE_STRING)) {
            // get the dbsnp object corresponding to this record (needed to help us distinguish
            // between insertions and deletions)
            VariantContext dbsnpVC = getDbsnp(hapmap.getName());
            if (dbsnpVC == null || dbsnpVC.isMixed()) continue;

            Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
            alleleMap.put(
                RawHapMapFeature.DELETION,
                Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
            alleleMap.put(
                RawHapMapFeature.INSERTION,
                Allele.create(
                    (char) ref.getBase() + ((RawHapMapFeature) record).getAlleles()[1],
                    !dbsnpVC.isSimpleInsertion()));
            hapmap.setActualAlleles(alleleMap);

            // also, use the correct positioning for insertions
            hapmap.updatePosition(dbsnpVC.getStart());

            if (hapmap.getStart() < ref.getWindow().getStart()) {
              logger.warn(
                  "Hapmap record at "
                      + ref.getLocus()
                      + " represents an indel too large to be converted; skipping...");
              continue;
            }
          }
        }

        // ok, we might actually be able to turn this record in a variant context
        VariantContext vc =
            VariantContextAdaptors.toVariantContext(variants.getName(), record, ref);

        if (vc != null) // sometimes the track has odd stuff in it that can't be converted
        VCs.add(vc);
      }
    }

    return VCs;
  }
  /**
   * Provides the next record from the underlying iterator after applying filter strings generated
   * by the set of filters in use by the iterator.
   */
  @Override
  public VariantContext next() {
    final VariantContext ctx = this.iterator.next();
    final Set<String> filterStrings = new HashSet<String>();

    // Collect variant level filters
    for (final VariantFilter filter : this.filters) {
      final String val = filter.filter(ctx);
      if (val != null) filterStrings.add(val);
    }

    // Collect genotype level filters in a Map of Sample -> List<filter string>
    final ListMap<String, String> gtFilterStrings = new ListMap<String, String>();
    final Set<String> variantSamples = new HashSet<String>();
    for (final Genotype gt : ctx.getGenotypes()) {
      if (gt.isCalled() && !gt.isHomRef()) variantSamples.add(gt.getSampleName());

      for (final GenotypeFilter filter : gtFilters) {
        final String filterString = filter.filter(ctx, gt);
        if (filterString != null) gtFilterStrings.add(gt.getSampleName(), filterString);
      }
    }

    // If all genotypes are filtered apply a site level filter
    if (gtFilterStrings.keySet().containsAll(variantSamples)) {
      filterStrings.add(ALL_GTS_FILTERED);
    }

    // Make a builder and set the site level filter appropriately
    final VariantContextBuilder builder = new VariantContextBuilder(ctx);
    if (filterStrings.isEmpty()) {
      builder.passFilters();
    } else {
      builder.filters(filterStrings);
    }

    // Apply filters to the necessary genotypes
    builder.noGenotypes();
    final List<Genotype> newGenotypes = new ArrayList<Genotype>(ctx.getNSamples());
    for (final Genotype gt : ctx.getGenotypes()) {
      final GenotypeBuilder gtBuilder = new GenotypeBuilder(gt);
      final List<String> filtersLocal = gtFilterStrings.get(gt.getSampleName());

      if (filtersLocal == null || filtersLocal.isEmpty()) {
        gtBuilder.filter(PASS_FILTER);
      } else {
        gtBuilder.filters(filtersLocal);
      }
      newGenotypes.add(gtBuilder.make());
    }
    builder.genotypes(newGenotypes);

    return builder.make();
  }
Пример #11
0
  private VariantContext getMatchingSnpEffRecord(
      List<VariantContext> snpEffRecords, VariantContext vc) {
    for (VariantContext snpEffRecord : snpEffRecords) {
      if (snpEffRecord.hasSameAlternateAllelesAs(vc)
          && snpEffRecord.getReference().equals(vc.getReference())) {
        return snpEffRecord;
      }
    }

    return null;
  }
Пример #12
0
  @Test
  public void shouldPreserveSymbolicAlleleCase() {
    VCFFileReader reader =
        new VCFFileReader(new File(VariantBaseTest.variantTestDataRoot + "breakpoint.vcf"), false);
    VariantContext variant = reader.iterator().next();
    reader.close();

    // VCF v4.1 s1.4.5
    // Tools processing VCF files are not required to preserve case in the allele String, except for
    // IDs, which are case sensitive.
    Assert.assertTrue(variant.getAlternateAllele(0).getDisplayString().contains("chr12"));
  }
Пример #13
0
 // this method is intended to reconcile uniquified sample names
 // it comes into play when calling this annotation from GenotypeGVCFs with --uniquifySamples
 // because founderIds
 // is derived from the sampleDB, which comes from the input sample names, but vc will have
 // uniquified (i.e. different)
 // sample names. Without this check, the founderIds won't be found in the vc and the annotation
 // won't be calculated.
 protected static Set<String> validateFounderIDs(
     final Set<String> founderIds, final VariantContext vc) {
   Set<String> vcSamples = new HashSet<>();
   Set<String> returnIDs = founderIds;
   vcSamples.addAll(vc.getSampleNames());
   if (!vcSamples.isEmpty()) {
     if (founderIds != null) {
       vcSamples.removeAll(founderIds);
       if (vcSamples.equals(vc.getSampleNames())) returnIDs = vc.getSampleNames();
     }
   }
   return returnIDs;
 }
Пример #14
0
  private List<SnpEffEffect> parseSnpEffRecord(VariantContext snpEffRecord) {
    List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();

    Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
    if (effectFieldValue == null) {
      return parsedEffects;
    }

    // The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a
    // String.
    // We can have either in the case of SnpEff, since there may be one or more than one effect in
    // this record.
    List<String> individualEffects;
    if (effectFieldValue instanceof List) {
      individualEffects = (List<String>) effectFieldValue;
    } else {
      individualEffects = Arrays.asList((String) effectFieldValue);
    }

    for (String effectString : individualEffects) {
      String[] effectNameAndMetadata = effectString.split(SNPEFF_EFFECT_METADATA_DELIMITER);

      if (effectNameAndMetadata.length != 2) {
        logger.warn(
            String.format(
                "Malformed SnpEff effect field at %s:%d, skipping: %s",
                snpEffRecord.getChr(), snpEffRecord.getStart(), effectString));
        continue;
      }

      String effectName = effectNameAndMetadata[0];
      String[] effectMetadata =
          effectNameAndMetadata[1].split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER, -1);

      SnpEffEffect parsedEffect = new SnpEffEffect(effectName, effectMetadata);

      if (parsedEffect.isWellFormed()) {
        parsedEffects.add(parsedEffect);
      } else {
        logger.warn(
            String.format(
                "Skipping malformed SnpEff effect field at %s:%d. Error was: \"%s\". Field was: \"%s\"",
                snpEffRecord.getChr(),
                snpEffRecord.getStart(),
                parsedEffect.getParseError(),
                effectString));
      }
    }

    return parsedEffects;
  }
Пример #15
0
  private void writeDifferences(
      final List<VariantContext> source1Alleles, final List<VariantContext> source2Alleles) {
    int currentIndex1 = 0, currentIndex2 = 0;
    final int size1 = source1Alleles.size(), size2 = source2Alleles.size();
    VariantContext current1 = source1Alleles.get(0);
    VariantContext current2 = source2Alleles.get(0);

    while (currentIndex1 < size1 || currentIndex2 < size2) {
      if (current1 == null) {
        writeOne(current2, source2, null);
        currentIndex2++;
        current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2) : null);
      } else if (current2 == null) {
        writeOne(current1, source1, null);
        currentIndex1++;
        current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1) : null);
      } else {

        final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1);
        final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2);

        if (loc1.getStart() == loc2.getStart() || loc1.overlapsP(loc2)) {
          String status;
          if (loc1.getStart() == loc2.getStart()) {
            final String allele1 = current1.getAlternateAllele(0).getBaseString();
            final String allele2 = current2.getAlternateAllele(0).getBaseString();
            if (allele1.indexOf(allele2) != -1 || allele2.indexOf(allele1) != -1)
              status = ONE_ALLELE_SUBSET_OF_OTHER_STATUS;
            else status = SAME_START_DIFFERENT_ALLELES_STATUS;
          } else {
            status = OVERLAPPING_EVENTS_STATUS;
          }

          writeOne(current1, INTERSECTION_SET, status);
          currentIndex1++;
          currentIndex2++;
          current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1) : null);
          current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2) : null);
        } else if (loc1.isBefore(loc2)) {
          writeOne(current1, source1, null);
          currentIndex1++;
          current1 = (currentIndex1 < size1 ? source1Alleles.get(currentIndex1) : null);
        } else {
          writeOne(current2, source2, null);
          currentIndex2++;
          current2 = (currentIndex2 < size2 ? source2Alleles.get(currentIndex2) : null);
        }
      }
    }
  }
  protected void noteCurrentRecord(VariantContext vc) {
    super.noteCurrentRecord(vc); // first, check for errors

    // then, update mostUpstreamWritableLoc:
    int mostUpstreamWritableIndex = vc.getStart() - maxCachingStartDistance;
    this.mostUpstreamWritableLoc = Math.max(BEFORE_MOST_UPSTREAM_LOC, mostUpstreamWritableIndex);
  }
  @Override
  protected AFCalculationResult computeLog10PNonRef(
      final VariantContext vc,
      final int defaultPloidy,
      final double[] log10AlleleFrequencyPriors,
      final StateTracker stateTracker) {
    Utils.nonNull(vc, "vc is null");
    Utils.nonNull(log10AlleleFrequencyPriors, "log10AlleleFrequencyPriors is null");
    Utils.nonNull(stateTracker, "stateTracker is null");
    final int numAlternateAlleles = vc.getNAlleles() - 1;

    final List<double[]> genotypeLikelihoods = getGLs(vc.getGenotypes(), true);
    final int numSamples = genotypeLikelihoods.size() - 1;
    final int numChr = 2 * numSamples;

    // queue of AC conformations to process
    final Deque<ExactACset> ACqueue = new LinkedList<>();

    // mapping of ExactACset indexes to the objects
    final Map<ExactACcounts, ExactACset> indexesToACset = new HashMap<>(numChr + 1);

    // add AC=0 to the queue
    final int[] zeroCounts = new int[numAlternateAlleles];
    final ExactACset zeroSet = new ExactACset(numSamples + 1, new ExactACcounts(zeroCounts));
    ACqueue.add(zeroSet);
    indexesToACset.put(zeroSet.getACcounts(), zeroSet);

    while (!ACqueue.isEmpty()) {

      // compute log10Likelihoods
      final ExactACset set = ACqueue.remove();

      calculateAlleleCountConformation(
          set,
          genotypeLikelihoods,
          numChr,
          ACqueue,
          indexesToACset,
          log10AlleleFrequencyPriors,
          stateTracker);

      // clean up memory
      indexesToACset.remove(set.getACcounts());
    }

    return getResultFromFinalState(vc, log10AlleleFrequencyPriors, stateTracker);
  }
  private double log10PLFromSamples(
      final VariantContext vc, final String sample, boolean calcRefP) {

    Genotype g = vc.getGenotype(sample);
    double log10pSample = -1000;
    if (!g.isNoCall()) {
      final double[] gLikelihoods = MathUtils.normalizeFromLog10(g.getLikelihoods().getAsVector());
      log10pSample = Math.log10(calcRefP ? gLikelihoods[0] : 1 - gLikelihoods[0]);
      log10pSample = Double.isInfinite(log10pSample) ? -10000 : log10pSample;
    }
    return log10pSample;
  }
Пример #19
0
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null || !BaseUtils.isRegularBase(ref.getBase())) return 0;

    Collection<VariantContext> contexts = getVariantContexts(tracker, ref);

    for (VariantContext vc : contexts) {
      VariantContextBuilder builder = new VariantContextBuilder(vc);

      // set the appropriate sample name if necessary
      if (sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName())) {
        Genotype g =
            new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make();
        builder.genotypes(g);
      }

      final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make());
      writeRecord(withID, tracker, ref.getLocus());
    }

    return 1;
  }
Пример #20
0
  private void writeAndPurgeAllEqualVariants(
      final List<VariantContext> sourceVCs1,
      final List<VariantContext> sourceVCs2,
      final String status) {

    int currentIndex1 = 0, currentIndex2 = 0;
    int size1 = sourceVCs1.size(), size2 = sourceVCs2.size();
    VariantContext current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1) : null);
    VariantContext current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2) : null);

    while (current1 != null && current2 != null) {

      final GenomeLoc loc1 = getToolkit().getGenomeLocParser().createGenomeLoc(current1);
      final GenomeLoc loc2 = getToolkit().getGenomeLocParser().createGenomeLoc(current2);

      if (loc1.equals(loc2)
          || (loc1.getStart() == loc2.getStart()
              && (current1.getAlternateAlleles().size() > 1
                  || current2.getAlternateAlleles().size() > 1))) {
        // test the alleles
        if (determineAndWriteOverlap(current1, current2, status)) {
          sourceVCs1.remove(currentIndex1);
          sourceVCs2.remove(currentIndex2);
          size1--;
          size2--;
        } else {
          currentIndex1++;
          currentIndex2++;
        }
        current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1) : null);
        current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2) : null);
      } else if (loc1.isBefore(loc2)) {
        currentIndex1++;
        current1 = (currentIndex1 < size1 ? sourceVCs1.get(currentIndex1) : null);
      } else {
        currentIndex2++;
        current2 = (currentIndex2 < size2 ? sourceVCs2.get(currentIndex2) : null);
      }
    }
  }
Пример #21
0
  private byte[] generateHaplotype(
      final List<VariantContext> sourceVCs, final ReferenceContext refContext) {

    final StringBuilder sb = new StringBuilder();

    final int startPos = refContext.getWindow().getStart();
    int currentPos = startPos;
    final byte[] reference = refContext.getBases();

    for (final VariantContext vc : sourceVCs) {
      // add any missing reference context
      int vcStart = vc.getStart();
      final int refAlleleLength = vc.getReference().length();
      if (refAlleleLength
          == vc.getEnd()
              - vc.getStart()) // this is a deletion (whereas for other events the padding base
        // isn't part of the position)
        vcStart++;

      while (currentPos < vcStart) sb.append((char) reference[currentPos++ - startPos]);

      // add the alt allele
      sb.append(vc.getAlternateAllele(0).getBaseString());

      // skip the reference allele
      currentPos += refAlleleLength;
    }
    // add any missing reference context
    final int stopPos = refContext.getWindow().getStop();
    while (currentPos < stopPos) sb.append((char) reference[currentPos++ - startPos]);

    return sb.toString().getBytes();
  }
Пример #22
0
  @Override
  protected void doWork(String inputSource, VcfIterator r, VariantContextWriter w)
      throws IOException {
    VCFHeader header = r.getHeader();

    VCFHeader h2 = new VCFHeader(header.getMetaDataInInputOrder(), header.getSampleNamesInOrder());
    h2.addMetaDataLine(
        new VCFInfoHeaderLine(
            TAG,
            VCFHeaderLineCount.UNBOUNDED,
            VCFHeaderLineType.String,
            "metadata added from " + TABIX + " . Format was " + FORMAT));
    h2.addMetaDataLine(
        new VCFHeaderLine(
            getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
    h2.addMetaDataLine(
        new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
    h2.addMetaDataLine(
        new VCFHeaderLine(
            getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
    h2.addMetaDataLine(
        new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));

    SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
    w.writeHeader(h2);
    while (r.hasNext()) {
      VariantContext ctx = progress.watch(r.next());
      Set<String> annotations = new HashSet<String>();

      CloseableIterator<BedLine> iter =
          this.bedReader.iterator(ctx.getContig(), ctx.getStart() - 1, ctx.getEnd() + 1);
      while (iter.hasNext()) {
        BedLine bedLine = iter.next();

        if (!ctx.getContig().equals(bedLine.getContig())) continue;
        if (ctx.getStart() - 1 >= bedLine.getEnd()) continue;
        if (ctx.getEnd() - 1 < bedLine.getStart()) continue;

        String newannot = this.parsedFormat.toString(bedLine);
        if (!newannot.isEmpty()) annotations.add(VCFUtils.escapeInfoField(newannot));
      }
      CloserUtil.close(iter);

      if (annotations.isEmpty()) {
        w.add(ctx);
        continue;
      }
      VariantContextBuilder vcb = new VariantContextBuilder(ctx);
      vcb.attribute(TAG, annotations.toArray());
      w.add(vcb.make());
      incrVariantCount();
      if (checkOutputError()) break;
    }
    progress.finish();
  }
 private int calculateTumorNNR(final VariantContext vc) {
   int nnr = 0;
   switch (vc.getGenotype(tumorSample).getType()) {
     case HET:
     case HOM_VAR:
       nnr += 1;
       break;
     case NO_CALL:
     case UNAVAILABLE:
     case HOM_REF:
       break;
   }
   return nnr;
 }
  /**
   * Returns a list of attribute values from a VCF file
   *
   * @param vcfFile VCF file
   * @param attributeName attribute name
   * @throws IOException if the file does not exist or can not be opened
   * @return list of attribute values
   */
  private List<String> getAttributeValues(final File vcfFile, final String attributeName)
      throws IOException {
    final VCFCodec codec = new VCFCodec();
    final FileInputStream s = new FileInputStream(vcfFile);
    final LineIterator lineIteratorVCF =
        codec.makeSourceFromStream(new PositionalBufferedStream(s));
    codec.readHeader(lineIteratorVCF);

    List<String> attributeValues = new ArrayList<String>();
    while (lineIteratorVCF.hasNext()) {
      final String line = lineIteratorVCF.next();
      Assert.assertFalse(line == null);
      final VariantContext vc = codec.decode(line);

      for (final Genotype g : vc.getGenotypes()) {
        if (g.hasExtendedAttribute(attributeName)) {
          attributeValues.add((String) g.getExtendedAttribute(attributeName));
        }
      }
    }

    return attributeValues;
  }
 private int calculateTumorAC(final VariantContext vc) {
   int ac = 0;
   switch (vc.getGenotype(tumorSample).getType()) {
     case HET:
       ac += 1;
       break;
     case HOM_VAR:
       ac += 2;
       break;
     case NO_CALL:
     case UNAVAILABLE:
     case HOM_REF:
       break;
   }
   return ac;
 }
Пример #26
0
  @Override
  public Map<String, Object> annotate(
      final RefMetaDataTracker tracker,
      final AnnotatorCompatible walker,
      final ReferenceContext ref,
      final Map<String, AlignmentContext> stratifiedContexts,
      final VariantContext vc,
      final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {

    final GenotypesContext genotypes = vc.getGenotypes();
    if (genotypes == null || genotypes.size() < MIN_SAMPLES) {
      if (!warningLogged) {
        logger.warn("Too few genotypes");
        warningLogged = true;
      }
      return null;
    }

    int refCount = 0;
    int hetCount = 0;
    int homCount = 0;
    for (final Genotype g : genotypes) {
      if (g.isNoCall()) continue;

      // TODO - fix me:
      // Right now we just ignore genotypes that are not confident, but this throws off
      //  our HW ratios.  More analysis is needed to determine the right thing to do when
      //  the genotyper cannot decide whether a given sample is het or hom var.
      if (g.getLog10PError() > MIN_LOG10_PERROR) continue;

      if (g.isHomRef()) refCount++;
      else if (g.isHet()) hetCount++;
      else homCount++;
    }

    if (refCount + hetCount + homCount == 0) return null;

    double pvalue = HardyWeinbergCalculation.hwCalculate(refCount, hetCount, homCount);
    // System.out.println(refCount + " " + hetCount + " " + homCount + " " + pvalue);
    Map<String, Object> map = new HashMap<>();
    map.put(getKeyNames().get(0), String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue)));
    return map;
  }
Пример #27
0
  private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) {
    if (!wroteHeader) {
      wroteHeader = true;

      // setup the header fields
      Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
      hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
      hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));

      allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
      for (VCFHeaderLine field : hInfo) {
        if (field instanceof VCFFormatHeaderLine) {
          allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID());
        }
      }

      samples = new LinkedHashSet<String>();
      if (sampleName != null) {
        samples.add(sampleName);
      } else {
        // try VCF first
        samples =
            SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));

        if (samples.isEmpty()) {
          List<Feature> features = tracker.getValues(variants, loc);
          if (features.size() == 0)
            throw new IllegalStateException(
                "No rod data is present, but we just created a VariantContext");

          Feature f = features.get(0);
          if (f instanceof RawHapMapFeature)
            samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs()));
          else samples.addAll(vc.getSampleNames());
        }
      }

      vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
    }

    vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
    vcfwriter.add(vc);
  }
Пример #28
0
  @Override
  public Map<String, Object> annotate(
      final RefMetaDataTracker tracker,
      final AnnotatorCompatible walker,
      final ReferenceContext ref,
      final Map<String, AlignmentContext> stratifiedContexts,
      final VariantContext vc,
      final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
    // Can only call from UnifiedGenotyper
    if (!(walker instanceof UnifiedGenotyper)) {
      if (!walkerIdentityCheckWarningLogged) {
        if (walker != null)
          logger.warn(
              "Annotation will not be calculated, must be called from UnifiedGenotyper, not "
                  + walker.getClass().getName());
        else logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper");
        walkerIdentityCheckWarningLogged = true;
      }
      return null;
    }

    if (stratifiedContexts.isEmpty()) return null;

    // not meaningful when we're at an indel location: deletions that start at location N are by
    // definition called at the position  N-1, and at position N-1
    // there are no informative deletions in the pileup
    if (!vc.isSNP()) return null;

    int deletions = 0;
    int depth = 0;
    for (Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet()) {
      for (final PileupElement p : sample.getValue().getBasePileup()) {
        depth++;
        if (p.isDeletion()) deletions++;
      }
    }
    Map<String, Object> map = new HashMap<>();
    map.put(
        getKeyNames().get(0),
        String.format("%.2f", depth == 0 ? 0.0 : (double) deletions / (double) depth));
    return map;
  }
Пример #29
0
 private void addVariant(final VariantContext ctx) {
   if (!ctx.getChr().equals(genes.get(0).getChromosome())) return;
   if (ctx.getStart() >= chromEnd) return;
   if (ctx.getStart() < chromStart) return;
   positions.add(ctx.getStart());
   for (String sample : ctx.getSampleNames()) {
     Genotype g = ctx.getGenotype(sample);
     if (!g.isAvailable()) continue;
     if (!g.isCalled()) continue;
     if (g.isNoCall()) continue;
     if (g.isNonInformative()) continue;
     Set<Integer> set = sample2positions.get(sample);
     if (set == null) {
       set = new HashSet<Integer>();
       sample2positions.put(sample, set);
     }
     set.add(ctx.getStart());
   }
 }
Пример #30
0
  @Override
  public void accumulate(final VariantContext ctx) {
    logger.record(ctx.getContig(), ctx.getStart());

    final String variantChrom = ctx.getContig();
    final int variantPos = ctx.getStart();

    // Skip anything a little too funky
    if (ctx.isFiltered()) return;
    if (!ctx.isVariant()) return;
    if (SKIP_CHROMS.contains(variantChrom)) return;

    for (final MendelianViolationMetrics trio : trios) {
      final Genotype momGt = ctx.getGenotype(trio.MOTHER);
      final Genotype dadGt = ctx.getGenotype(trio.FATHER);
      final Genotype kidGt = ctx.getGenotype(trio.OFFSPRING);

      // if any genotype:
      // - has a non-snp allele; or
      // - lacks a reference allele
      //
      // then ignore this trio
      if (CollectionUtil.makeList(momGt, dadGt, kidGt)
          .stream()
          .anyMatch(
              gt ->
                  gt.isHetNonRef()
                      || Stream.concat(Stream.of(ctx.getReference()), gt.getAlleles().stream())
                          .anyMatch(a -> a.length() != 1 || a.isSymbolic()))) {
        continue;
      }

      // if between the trio there are more than 2 alleles including the reference, continue
      if (Stream.concat(
                  Collections.singleton(ctx.getReference()).stream(),
                  CollectionUtil.makeList(momGt, dadGt, kidGt)
                      .stream()
                      .flatMap(gt -> gt.getAlleles().stream()))
              .collect(Collectors.toSet())
              .size()
          > 2) continue;

      // Test to make sure:
      //   1) That the site is in fact variant in the trio
      //   2) that the offspring doesn't have a really wacky het allele balance
      if (!isVariant(momGt, dadGt, kidGt)) continue;
      if (kidGt.isHet()) {
        final int[] ad = kidGt.getAD();
        if (ad == null) continue;

        final List<Integer> adOfAlleles =
            kidGt
                .getAlleles()
                .stream()
                .map(a -> ad[ctx.getAlleleIndex(a)])
                .collect(Collectors.toList());
        final double minAlleleFraction =
            Math.min(adOfAlleles.get(0), adOfAlleles.get(1))
                / (double) (adOfAlleles.get(0) + adOfAlleles.get(1));
        if (minAlleleFraction < MIN_HET_FRACTION) continue;
      }

      ///////////////////////////////////////////////////////////////
      // Determine whether the offspring should be haploid at this
      // locus and which is the parental donor of the haploid genotype
      ///////////////////////////////////////////////////////////////
      boolean haploid = false;
      Genotype haploidParentalGenotype = null;

      if (FEMALE_CHROMS.contains(variantChrom) && trio.OFFSPRING_SEX != Sex.Unknown) {
        if (trio.OFFSPRING_SEX == Sex.Female) {
          // famale
          haploid = false;
        } else if (isInPseudoAutosomalRegion(variantChrom, variantPos)) {
          // male but in PAR on X, so diploid
          haploid = false;
        } else {
          // male, out of PAR on X, haploid
          haploid = true;
          haploidParentalGenotype = momGt;
        }
      }

      // the PAR on the male chromosome should be masked so that reads
      // align to the female chromosomes instead, so there's no point
      // of worrying about that here.

      if (MALE_CHROMS.contains(variantChrom)) {
        if (trio.OFFSPRING_SEX == Sex.Male) {
          haploid = true;
          haploidParentalGenotype = dadGt;
        } else {
          continue;
        }
      }

      // We only want to look at sites where we have high enough confidence that the genotypes we
      // are looking at are
      // interesting.  We want to ensure that parents are always GQ>=MIN_GQ, and that the kid is
      // either GQ>=MIN_GQ or in the
      // case where kid is het that the phred-scaled-likelihood of being reference is >=MIN_GQ.
      if (haploid
          && (haploidParentalGenotype.isNoCall() || haploidParentalGenotype.getGQ() < MIN_GQ))
        continue;
      if (!haploid
          && (momGt.isNoCall()
              || momGt.getGQ() < MIN_GQ
              || dadGt.isNoCall()
              || dadGt.getGQ() < MIN_GQ)) continue;
      if (kidGt.isNoCall()) continue;
      if (momGt.isHomRef() && dadGt.isHomRef() && !kidGt.isHomRef()) {
        if (kidGt.getPL()[0] < MIN_GQ) continue;
      } else if (kidGt.getGQ() < MIN_GQ) continue;

      // Also filter on the DP for each of the samples - it's possible to miss hets when DP is too
      // low
      if (haploid && (kidGt.getDP() < MIN_DP || haploidParentalGenotype.getDP() < MIN_DP)) continue;
      if (!haploid && (kidGt.getDP() < MIN_DP || momGt.getDP() < MIN_DP || dadGt.getDP() < MIN_DP))
        continue;

      trio.NUM_VARIANT_SITES++;

      ///////////////////////////////////////////////////////////////
      // First test for haploid violations
      ///////////////////////////////////////////////////////////////
      MendelianViolation type = null;
      if (haploid) {
        if (kidGt.isHet()) continue; // Should not see heterozygous calls at haploid regions

        if (!haploidParentalGenotype.getAlleles().contains(kidGt.getAllele(0))) {
          if (kidGt.isHomRef()) {
            type = MendelianViolation.Haploid_Other;
            trio.NUM_HAPLOID_OTHER++;
          } else {
            type = MendelianViolation.Haploid_Denovo;
            trio.NUM_HAPLOID_DENOVO++;
          }
        }
      }
      ///////////////////////////////////////////////////////////////
      // Then test for diploid mendelian violations
      ///////////////////////////////////////////////////////////////
      else if (isMendelianViolation(momGt, dadGt, kidGt)) {
        if (momGt.isHomRef() && dadGt.isHomRef() && !kidGt.isHomRef()) {
          trio.NUM_DIPLOID_DENOVO++;
          type = MendelianViolation.Diploid_Denovo;
        } else if (momGt.isHomVar() && dadGt.isHomVar() && kidGt.isHet()) {
          trio.NUM_HOMVAR_HOMVAR_HET++;
          type = MendelianViolation.HomVar_HomVar_Het;
        } else if (kidGt.isHom()
            && ((momGt.isHomRef() && dadGt.isHomVar()) || (momGt.isHomVar() && dadGt.isHomRef()))) {
          trio.NUM_HOMREF_HOMVAR_HOM++;
          type = MendelianViolation.HomRef_HomVar_Hom;
        } else if (kidGt.isHom()
            && ((momGt.isHom() && dadGt.isHet()) || (momGt.isHet() && dadGt.isHom()))) {
          trio.NUM_HOM_HET_HOM++;
          type = MendelianViolation.Hom_Het_Hom;
        } else {
          trio.NUM_OTHER++;
          type = MendelianViolation.Other;
        }
      }

      // Output a record into the family's violation VCF
      if (type != null) {
        // Create a new Context subsetted to the three samples
        final VariantContextBuilder builder = new VariantContextBuilder(ctx);
        builder.genotypes(
            ctx.getGenotypes()
                .subsetToSamples(CollectionUtil.makeSet(trio.MOTHER, trio.FATHER, trio.OFFSPRING)));
        builder.attribute(MENDELIAN_VIOLATION_KEY, type.name());

        // Copy over some useful attributes from the full context
        if (ctx.hasAttribute(VCFConstants.ALLELE_COUNT_KEY))
          builder.attribute(ORIGINAL_AC, ctx.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
        if (ctx.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY))
          builder.attribute(ORIGINAL_AF, ctx.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY));
        if (ctx.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY))
          builder.attribute(ORIGINAL_AN, ctx.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));

        // Write out the variant record
        familyToViolations.get(trio.FAMILY_ID).add(builder.make());
      }
    }
  }