Пример #1
0
  private byte[] generateHaplotype(
      final List<VariantContext> sourceVCs, final ReferenceContext refContext) {

    final StringBuilder sb = new StringBuilder();

    final int startPos = refContext.getWindow().getStart();
    int currentPos = startPos;
    final byte[] reference = refContext.getBases();

    for (final VariantContext vc : sourceVCs) {
      // add any missing reference context
      int vcStart = vc.getStart();
      final int refAlleleLength = vc.getReference().length();
      if (refAlleleLength
          == vc.getEnd()
              - vc.getStart()) // this is a deletion (whereas for other events the padding base
        // isn't part of the position)
        vcStart++;

      while (currentPos < vcStart) sb.append((char) reference[currentPos++ - startPos]);

      // add the alt allele
      sb.append(vc.getAlternateAllele(0).getBaseString());

      // skip the reference allele
      currentPos += refAlleleLength;
    }
    // add any missing reference context
    final int stopPos = refContext.getWindow().getStop();
    while (currentPos < stopPos) sb.append((char) reference[currentPos++ - startPos]);

    return sb.toString().getBytes();
  }
Пример #2
0
  private Collection<VariantContext> getVariantContexts(
      RefMetaDataTracker tracker, ReferenceContext ref) {

    List<Feature> features = tracker.getValues(variants, ref.getLocus());
    List<VariantContext> VCs = new ArrayList<VariantContext>(features.size());

    for (Feature record : features) {
      if (VariantContextAdaptors.canBeConvertedToVariantContext(record)) {
        // we need to special case the HapMap format because indels aren't handled correctly
        if (record instanceof RawHapMapFeature) {

          // is it an indel?
          RawHapMapFeature hapmap = (RawHapMapFeature) record;
          if (hapmap.getAlleles()[0].equals(RawHapMapFeature.NULL_ALLELE_STRING)
              || hapmap.getAlleles()[1].equals(RawHapMapFeature.NULL_ALLELE_STRING)) {
            // get the dbsnp object corresponding to this record (needed to help us distinguish
            // between insertions and deletions)
            VariantContext dbsnpVC = getDbsnp(hapmap.getName());
            if (dbsnpVC == null || dbsnpVC.isMixed()) continue;

            Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
            alleleMap.put(
                RawHapMapFeature.DELETION,
                Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
            alleleMap.put(
                RawHapMapFeature.INSERTION,
                Allele.create(
                    (char) ref.getBase() + ((RawHapMapFeature) record).getAlleles()[1],
                    !dbsnpVC.isSimpleInsertion()));
            hapmap.setActualAlleles(alleleMap);

            // also, use the correct positioning for insertions
            hapmap.updatePosition(dbsnpVC.getStart());

            if (hapmap.getStart() < ref.getWindow().getStart()) {
              logger.warn(
                  "Hapmap record at "
                      + ref.getLocus()
                      + " represents an indel too large to be converted; skipping...");
              continue;
            }
          }
        }

        // ok, we might actually be able to turn this record in a variant context
        VariantContext vc =
            VariantContextAdaptors.toVariantContext(variants.getName(), record, ref);

        if (vc != null) // sometimes the track has odd stuff in it that can't be converted
        VCs.add(vc);
      }
    }

    return VCs;
  }
Пример #3
0
  private void resolveByHaplotype(final ReferenceContext refContext) {

    final byte[] source1Haplotype = generateHaplotype(sourceVCs1, refContext);
    final byte[] source2Haplotype = generateHaplotype(sourceVCs2, refContext);

    final SWPairwiseAlignment swConsensus1 =
        new SWPairwiseAlignment(
            refContext.getBases(), source1Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
    final SWPairwiseAlignment swConsensus2 =
        new SWPairwiseAlignment(
            refContext.getBases(), source2Haplotype, SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);

    // protect against SW failures
    if (swConsensus1.getCigar().toString().contains("S")
        || swConsensus1.getCigar().getReferenceLength() < 20
        || swConsensus2.getCigar().toString().contains("S")
        || swConsensus2.getCigar().getReferenceLength() < 20) {
      // TODO -- handle errors appropriately
      logger.debug("Bad SW alignment; aborting at " + refContext.getLocus());
      return;
    }

    // order results by start position
    final TreeMap<Integer, VariantContext> source1Map =
        new TreeMap<Integer, VariantContext>(
            HaplotypeCallerGenotypingEngine.generateVCsFromAlignment(
                new Haplotype(source1Haplotype, false, 0, swConsensus1.getCigar()),
                refContext.getBases(),
                refContext.getWindow(),
                source1));
    final TreeMap<Integer, VariantContext> source2Map =
        new TreeMap<Integer, VariantContext>(
            HaplotypeCallerGenotypingEngine.generateVCsFromAlignment(
                new Haplotype(source2Haplotype, false, 0, swConsensus2.getCigar()),
                refContext.getBases(),
                refContext.getWindow(),
                source2));
    if (source1Map.size() == 0 || source2Map.size() == 0) {
      // TODO -- handle errors appropriately
      logger.debug("No source alleles; aborting at " + refContext.getLocus());
      return;
    }

    // create lists and test for equality
    final List<VariantContext> source1Alleles = new ArrayList<VariantContext>(source1Map.values());
    final List<VariantContext> source2Alleles = new ArrayList<VariantContext>(source2Map.values());

    writeAndPurgeAllEqualVariants(source1Alleles, source2Alleles, SAME_BY_HAPLOTYPE_STATUS);
    if (source1Alleles.isEmpty()) {
      writeAll(source2Alleles, source2, null);
    } else if (source2Alleles.isEmpty()) {
      writeAll(source1Alleles, source1, null);
    } else {
      writeDifferences(source1Alleles, source2Alleles);
    }
  }
Пример #4
0
  @Override
  public Long map(
      final RefMetaDataTracker tracker,
      final ReferenceContext ref,
      final AlignmentContext context) {
    GenomeLoc refLocus = ref.getLocus();

    // process and remove any intervals in the map that are don't overlap the current locus anymore
    // and add all new intervals that may overlap this reference locus
    addNewOverlappingIntervals(refLocus);
    outputFinishedIntervals(refLocus, ref.getBase());

    // at this point, all intervals in intervalMap overlap with this locus, so update all of them
    for (IntervalStratification intervalStratification : intervalMap.values())
      intervalStratification.addLocus(context, ref);

    return 1L;
  }
Пример #5
0
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null || !BaseUtils.isRegularBase(ref.getBase())) return 0;

    Collection<VariantContext> contexts = getVariantContexts(tracker, ref);

    for (VariantContext vc : contexts) {
      VariantContextBuilder builder = new VariantContextBuilder(vc);

      // set the appropriate sample name if necessary
      if (sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName())) {
        Genotype g =
            new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make();
        builder.genotypes(g);
      }

      final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make());
      writeRecord(withID, tracker, ref.getLocus());
    }

    return 1;
  }
Пример #6
0
  private static double computeGCContent(ReferenceContext ref) {
    int gc = 0, at = 0;

    for (byte base : ref.getBases()) {
      int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
      if (baseIndex == BaseUtils.Base.G.ordinal() || baseIndex == BaseUtils.Base.C.ordinal()) gc++;
      else if (baseIndex == BaseUtils.Base.A.ordinal() || baseIndex == BaseUtils.Base.T.ordinal())
        at++;
      else ; // ignore
    }

    int sum = gc + at;
    return (100.0 * gc) / (sum == 0 ? 1 : sum);
  }
Пример #7
0
  @Override
  public CallableBaseState map(
      RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    CalledState state;

    if (BaseUtils.isNBase(ref.getBase())) {
      state = CalledState.REF_N;
    } else {
      // count up the depths of all and QC+ bases
      int rawDepth = 0, QCDepth = 0, lowMAPQDepth = 0;
      for (PileupElement e : context.getBasePileup()) {
        rawDepth++;

        if (e.getMappingQual() <= maxLowMAPQ) lowMAPQDepth++;

        if (e.getMappingQual() >= minMappingQuality
            && (e.getQual() >= minBaseQuality || e.isDeletion())) {
          QCDepth++;
        }
      }

      // System.out.printf("%s rawdepth = %d QCDepth = %d lowMAPQ = %d%n", context.getLocation(),
      // rawDepth, QCDepth, lowMAPQDepth);
      if (rawDepth == 0) {
        state = CalledState.NO_COVERAGE;
      } else if (rawDepth >= minDepthLowMAPQ
          && MathUtils.ratio(lowMAPQDepth, rawDepth) >= maxLowMAPQFraction) {
        state = CalledState.POOR_MAPPING_QUALITY;
      } else if (QCDepth < minDepth) {
        state = CalledState.LOW_COVERAGE;
      } else if (rawDepth >= maxDepth && maxDepth != -1) {
        state = CalledState.EXCESSIVE_COVERAGE;
      } else {
        state = CalledState.CALLABLE;
      }
    }

    return new CallableBaseState(getToolkit().getGenomeLocParser(), context.getLocation(), state);
  }
Пример #8
0
  /**
   * For each site of interest, annotate based on the requested annotation types
   *
   * @param tracker the meta-data tracker
   * @param ref the reference base
   * @param context the context for the given locus
   * @return 1 if the locus was successfully processed, 0 if otherwise
   */
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null) return 0;

    // get the variant contexts for all the variants at the location
    Collection<VariantContext> VCs =
        tracker.getValues(variantCollection.variants, context.getLocation());
    if (VCs.isEmpty()) return 0;

    Collection<VariantContext> annotatedVCs = VCs;

    // if the reference base is not ambiguous, we can annotate
    Map<String, AlignmentContext> stratifiedContexts;
    if (BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) {
      stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
      annotatedVCs = new ArrayList<>(VCs.size());
      for (VariantContext vc : VCs)
        annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc));
    }

    for (VariantContext annotatedVC : annotatedVCs) vcfWriter.add(annotatedVC);

    return 1;
  }
Пример #9
0
  @Override
  public Map<String, Object> annotate(
      final RefMetaDataTracker tracker,
      final AnnotatorCompatible walker,
      final ReferenceContext ref,
      final Map<String, AlignmentContext> stratifiedContexts,
      final VariantContext vc,
      final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {

    // Can not annotate if failed initialization conditions
    if (!canAnnotate) return null;

    RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();

    // Get only SnpEff records that start at this locus, not merely span it:
    List<VariantContext> snpEffRecords = tracker.getValues(snpEffRodBinding, ref.getLocus());

    // Within this set, look for a SnpEff record whose ref/alt alleles match the record to annotate.
    // If there is more than one such record, we only need to pick the first one, since the
    // biological
    // effects will be the same across all such records:
    VariantContext matchingRecord = getMatchingSnpEffRecord(snpEffRecords, vc);
    if (matchingRecord == null) {
      return null;
    }

    // Parse the SnpEff INFO field annotation from the matching record into individual effect
    // objects:
    List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
    if (effects.isEmpty()) {
      return null;
    }

    // Add only annotations for one of the most biologically-significant effects from this set:
    SnpEffEffect mostSignificantEffect = getMostSignificantEffect(effects);
    return mostSignificantEffect.getAnnotations();
  }
  public Event map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

    boolean hasIndel = false;
    boolean hasInsertion = false;
    boolean hasPointEvent = false;

    int furthestStopPos = -1;

    // look at the rods for indels or SNPs
    if (tracker != null) {
      for (VariantContext vc : tracker.getValues(known)) {
        switch (vc.getType()) {
          case INDEL:
            hasIndel = true;
            if (vc.isSimpleInsertion()) hasInsertion = true;
            break;
          case SNP:
            hasPointEvent = true;
            break;
          case MIXED:
            hasPointEvent = true;
            hasIndel = true;
            if (vc.isSimpleInsertion()) hasInsertion = true;
            break;
          default:
            break;
        }
        if (hasIndel) furthestStopPos = vc.getEnd();
      }
    }

    // look at the normal context to get deletions and positions with high entropy
    final ReadBackedPileup pileup = context.getBasePileup();

    int mismatchQualities = 0, totalQualities = 0;
    final byte refBase = ref.getBase();
    for (PileupElement p : pileup) {

      // check the ends of the reads to see how far they extend
      furthestStopPos = Math.max(furthestStopPos, p.getRead().getAlignmentEnd());

      // is it a deletion or insertion?
      if (p.isDeletion() || p.isBeforeInsertion()) {
        hasIndel = true;
        if (p.isBeforeInsertion()) hasInsertion = true;
      }

      // look for mismatches
      else if (lookForMismatchEntropy) {
        if (p.getBase() != refBase) mismatchQualities += p.getQual();
        totalQualities += p.getQual();
      }
    }

    // make sure we're supposed to look for high entropy
    if (lookForMismatchEntropy
        && pileup.getNumberOfElements() >= minReadsAtLocus
        && (double) mismatchQualities / (double) totalQualities >= mismatchThreshold)
      hasPointEvent = true;

    // return null if no event occurred
    if (!hasIndel && !hasPointEvent) return null;

    // return null if we didn't find any usable reads/rods associated with the event
    if (furthestStopPos == -1) return null;

    GenomeLoc eventLoc = context.getLocation();
    if (hasInsertion)
      eventLoc =
          getToolkit()
              .getGenomeLocParser()
              .createGenomeLoc(eventLoc.getContig(), eventLoc.getStart(), eventLoc.getStart() + 1);

    EVENT_TYPE eventType =
        (hasIndel
            ? (hasPointEvent ? EVENT_TYPE.BOTH : EVENT_TYPE.INDEL_EVENT)
            : EVENT_TYPE.POINT_EVENT);

    return new Event(eventLoc, furthestStopPos, eventType);
  }