Пример #1
0
 private static VariantLocus createLocus(
     final Variant original, final int leftClip, final int rightClip) {
   final VariantLocus locus = original.getLocus();
   final char newPrevNt =
       leftClip == 0 ? locus.getPreviousRefNt() : locus.getRefNts().charAt(leftClip - 1);
   final String newReference = StringUtils.clip(locus.getRefNts(), leftClip, rightClip);
   final int newStart = locus.getStart() + leftClip;
   final int newEnd = locus.getEnd() - rightClip;
   return new VariantLocus(locus.getSequenceName(), newStart, newEnd, newReference, newPrevNt);
 }
Пример #2
0
 /**
  * changes the de novo flag from true to false on any sample incorrectly marked as de novo. (note
  * does not do the opposite) Also deals with the somatic cause. Some variants after splitting will
  * no longer have a somatic cause.
  *
  * @param variant the variant to check
  * @param checker the de novo checker for the current inheritance scenario
  * @return the corrected variant
  */
 public static Variant denovoCorrect(DenovoChecker checker, Variant variant) {
   final Set<Integer> nonDenovoSamples = new HashSet<>();
   for (int s = 0; s < variant.getNumberOfSamples(); s++) {
     final VariantSample sample = variant.getSample(s);
     if (sample != null) {
       final VariantSample.DeNovoStatus denovoCall = sample.isDeNovo();
       if (denovoCall == VariantSample.DeNovoStatus.IS_DE_NOVO) {
         if (!checker.isDenovo(variant, s)) {
           nonDenovoSamples.add(s);
         }
       }
     }
   }
   final Variant ret;
   if (nonDenovoSamples.size() == 0) {
     ret = variant;
   } else {
     final VariantLocus newLocus = variant.getLocus();
     final VariantSample[] newSamples = new VariantSample[variant.getNumberOfSamples()];
     for (int k = 0; k < newSamples.length; k++) {
       final VariantSample sample = variant.getSample(k);
       if (sample != null) {
         final VariantSample.DeNovoStatus newStatus;
         if (sample.isDeNovo() == VariantSample.DeNovoStatus.UNSPECIFIED) {
           newStatus = VariantSample.DeNovoStatus.UNSPECIFIED;
         } else {
           newStatus =
               sample.isDeNovo() == VariantSample.DeNovoStatus.IS_DE_NOVO
                       && !nonDenovoSamples.contains(k)
                   ? VariantSample.DeNovoStatus.IS_DE_NOVO
                   : VariantSample.DeNovoStatus.NOT_DE_NOVO;
         }
         newSamples[k] =
             new VariantSample(
                 sample.getPloidy(),
                 sample.getName(),
                 sample.isIdentity(),
                 sample.getMeasure(),
                 newStatus,
                 sample.getDeNovoPosterior());
         VariantSample.copy(sample, newSamples[k]);
       }
     }
     final Variant newVariant = new Variant(newLocus, newSamples);
     Variant.copy(variant, newVariant);
     ret = newVariant;
   }
   return ret;
 }
Пример #3
0
  static List<Variant> split(final Variant original, DenovoChecker denovoCorrector) {

    // Compute set of alleles and exit if there are none
    final HashSet<String> catSet = extractCalledAlleles(original);
    if (catSet.size() == 0) {
      return Collections.singletonList(original);
    }
    final String ref = original.getLocus().getRefNts();
    catSet.add(ref);

    // Check all are the same length, if not we cannot split
    final String[] cats = catSet.toArray(new String[catSet.size()]);
    final int length = cats[0].length();
    for (final String c : cats) {
      if (c.length() != length) {
        return Collections.singletonList(original);
      }
    }

    // After this loop, "false" in syndrome indicates a column where all alleles agree
    // and thus represents a split point.
    final boolean[] syndrome = new boolean[length];
    final String c = cats[0];
    for (int k = 1; k < cats.length; k++) {
      for (int j = 0; j < length; j++) {
        syndrome[j] |= c.charAt(j) != cats[k].charAt(j);
      }
    }
    // System.err.println(java.util.Arrays.toString(syndrome));

    // Check for at least one split point
    boolean hasNoSplitPoint = true;
    for (final boolean s : syndrome) {
      hasNoSplitPoint &= s;
    }
    if (hasNoSplitPoint) {
      return Collections.singletonList(original);
    }

    // Create variants around the split points
    final List<Variant> list = new ArrayList<>();
    int startSplit = 0;
    int splitId = 0; // Unique identifier for each subcall
    while (startSplit < length) {
      if (syndrome[startSplit]) {
        int endSplit = startSplit + 1;
        while (endSplit < length && syndrome[endSplit]) {
          endSplit++;
        }
        final Variant splitVariant =
            createSplitVariant(original, startSplit, length - endSplit, splitId++);
        final Variant variant =
            denovoCorrector != null ? denovoCorrect(denovoCorrector, splitVariant) : splitVariant;
        list.add(variant);
        startSplit = endSplit + 1;
      } else {
        startSplit++;
      }
    }
    return list;
  }
Пример #4
0
  /**
   * Trim a variant by removing common prefix and suffix from each call and reference and adjusting
   * all the position information accordingly.
   *
   * @param original the original call
   * @return trimmed variant (possibly original if no change made)
   */
  static Variant trim(final Variant original) {

    final String ref = original.getLocus().getRefNts();
    if (ref.length() == 0) {
      // Cannot possibly trim if we are inserting, all bases are to be inserted
      return original;
    }

    // Compute set of called alleles and exit if there are none
    final HashSet<String> catSet = extractCalledAlleles(original);
    if (catSet.size() == 0) {
      return original;
    }

    // Include actual reference sequence (this will not be "")
    assert ref.length() > 0;
    catSet.add(ref);

    // Compute maximal clip positions based on set of alleles
    final String[] cats = catSet.toArray(new String[catSet.size()]);

    final int rightClip = StringUtils.longestSuffix(cats);
    final int leftClip = StringUtils.longestPrefix(rightClip, cats);
    // final int leftClip = StringUtils.longestPrefix(cats);
    // final int rightClip = StringUtils.longestSuffix(cats, leftClip);

    // Quick exit if no trimming is possible
    if (leftClip == 0 && rightClip == 0) {
      return original;
    }

    // Create new locus, will be "" for case of pure insertion
    final VariantLocus newLocus = createLocus(original, leftClip, rightClip);

    final VariantSample[] newSamples;
    if (original.getNumberOfSamples() > 0) {
      // trim description
      Description oldDescription = DescriptionNone.SINGLETON;
      for (int i = 0; i < original.getNumberOfSamples(); i++) {
        if (original.getSample(i) != null
            && !(original.getSample(i).getStats().counts().getDescription()
                instanceof DescriptionNone)) {
          oldDescription = original.getSample(i).getStats().counts().getDescription();
        }
      }
      final String[] alleles = new String[oldDescription.size()];
      final int[] alleleMap = new int[oldDescription.size()];
      for (int i = 0; i < oldDescription.size(); i++) {
        final String clipped = StringUtils.clip(oldDescription.name(i), leftClip, rightClip);
        alleles[i] = clipped;
        alleleMap[i] = i;
      }

      final Description newDescription = new DescriptionCommon(alleles);
      // Create new trimmed samples
      newSamples = createVariants(original, leftClip, rightClip, newDescription, alleleMap);
    } else {
      newSamples = new VariantSample[0];
    }

    // Create new variant from the new samples
    final Variant result = new Variant(newLocus, newSamples);
    Variant.copy(original, result);
    // This must be done after the copy
    result.setPossibleCause(createVariantName(leftClip, rightClip, original.getPossibleCause()));
    result.setTrimmed();
    // System.err.println(original);
    // System.err.println(result);
    return result;
  }