public void testRemoveMulti() throws Exception {
    try (TestDirectory td = new TestDirectory()) {
      final File f =
          FileHelper.resourceToGzFile(
              "com/rtg/vcf/resources/vcfsubset.vcf", new File(td, "vcf.vcf.gz"));
      final File out = new File(td, "out.vcf");

      checkMainInitOk(
          "-i",
          f.getPath(),
          "-o",
          out.getPath(),
          "--remove-samples",
          "--keep-info",
          "AN",
          "--keep-info",
          "AC",
          "--keep-filter",
          "YEA",
          "-Z");

      final String content = FileHelper.fileToString(out);
      final String editedContent =
          StringUtils.grepMinusV(StringUtils.grepMinusV(content, "^##RUN-ID"), "^##CL");
      mNano.check("vcfsubset-multi.vcf", editedContent);
    }
  }
  public void testExplosion() throws Exception {
    try (TestDirectory td = new TestDirectory()) {
      final File f =
          FileHelper.resourceToGzFile(
              "com/rtg/vcf/resources/vcfsubset.vcf", new File(td, "vcf.vcf.gz"));
      final File out = new File(td, "out.vcf");

      assertEquals(
          "Records skipped due to invalid or incompatible sample fields: 1" + StringUtils.LS,
          checkMainInitWarn(
              "-i",
              f.getPath(),
              "-o",
              out.getPath(),
              "--remove-format",
              "GT",
              "--remove-format",
              "DS",
              "-Z"));

      final String content = FileHelper.fileToString(out);
      final String nonheader = StringUtils.grepMinusV(content, "^#");
      assertTrue(nonheader.startsWith("X\t60052"));
    }
  }
Beispiel #3
0
 private static String createVariantName(
     final int leftClip, final int rightClip, final String oldName) {
   if (oldName == null) {
     return oldName;
   }
   final int colon = oldName.indexOf(VariantUtils.COLON);
   if (colon >= 0) {
     assert colon >= 0 && oldName.indexOf(VariantUtils.COLON, colon + 1) == -1;
     final String allele1 = StringUtils.clip(oldName.substring(0, colon), leftClip, rightClip);
     final String allele2 = StringUtils.clip(oldName.substring(colon + 1), leftClip, rightClip);
     return allele1 + VariantUtils.COLON + allele2;
   } else {
     assert oldName.indexOf(VariantUtils.COLON) == -1;
     return StringUtils.clip(oldName, leftClip, rightClip);
   }
 }
Beispiel #4
0
 private static VariantLocus createLocus(
     final Variant original, final int leftClip, final int rightClip) {
   final VariantLocus locus = original.getLocus();
   final char newPrevNt =
       leftClip == 0 ? locus.getPreviousRefNt() : locus.getRefNts().charAt(leftClip - 1);
   final String newReference = StringUtils.clip(locus.getRefNts(), leftClip, rightClip);
   final int newStart = locus.getStart() + leftClip;
   final int newEnd = locus.getEnd() - rightClip;
   return new VariantLocus(locus.getSequenceName(), newStart, newEnd, newReference, newPrevNt);
 }
Beispiel #5
0
 // Can't use Description, as that includes hypotheses that weren't necessarily called.
 private static HashSet<String> extractCalledAlleles(final Variant variant) {
   final HashSet<String> alleles = new HashSet<>();
   for (int k = 0; k < variant.getNumberOfSamples(); k++) {
     final VariantSample vs = variant.getSample(k);
     if (vs != null) {
       if (!vs.isIdentity()) {
         Collections.addAll(alleles, StringUtils.split(vs.getName(), VariantUtils.COLON));
       }
     }
   }
   return alleles;
 }
 void infoString(final StringBuilder sb) {
   sb.append("Memory Usage: ").append(mNumberSequences).append(" sequences").append(LS);
   long totalBytes = mData.infoString(sb);
   if (mNames != null) {
     sb.append("\t\t")
         .append(StringUtils.commas(mNames.bytes()))
         .append("\t")
         .append(StringUtils.commas(mNames.length()))
         .append("\tNames")
         .append(LS);
     totalBytes += mNames.bytes();
   }
   if (mNameSuffixes != null) {
     sb.append("\t\t")
         .append(StringUtils.commas(mNameSuffixes.bytes()))
         .append("\t")
         .append(StringUtils.commas(mNameSuffixes.length()))
         .append("\tSuffixes")
         .append(LS);
     totalBytes += mNameSuffixes.bytes();
   }
   sb.append("\t\t").append(StringUtils.commas(totalBytes)).append("\t\tTotal bytes").append(LS);
 }
  public void testKeepSamples() throws Exception {
    try (TestDirectory td = new TestDirectory()) {
      final File f =
          FileHelper.resourceToGzFile(
              "com/rtg/vcf/resources/vcfsubset.vcf", new File(td, "vcf.vcf.gz"));
      final File out = new File(td, "out.vcf");

      checkMainInitOk(
          "-i",
          f.getPath(),
          "-o",
          out.getPath(),
          "--keep-sample",
          "HG00096",
          "--keep-sample",
          "HG00100",
          "-Z");

      final String content = FileHelper.fileToString(out);
      final String editedContent =
          StringUtils.grepMinusV(StringUtils.grepMinusV(content, "^##RUN-ID"), "^##CL");
      mNano.check("vcfsubset-keepsamples.vcf", editedContent);
    }
  }
 @Override
 public void toString(final StringBuilder sb) {
   sb.append("Contaminated Cancer Model");
   final FormatReal fmt = new FormatReal(4, 3);
   sb.append(" contamination=").append(fmt.format(mContamination));
   sb.append(LS);
   final int pad = hypotheses().nameLength();
   final int size = ((HypothesesCancer) hypotheses()).subHypotheses().size();
   for (int i = 0; i < size; i++) {
     sb.append(StringUtils.padLeft(hypotheses().name(i), pad));
     for (int j = 0; j < size; j++) {
       final int k = hypotheses().code().code(i, j);
       sb.append(fmt.format(arithmetic().poss2Ln(mPosteriors[k])));
     }
     sb.append(LS);
   }
 }
Beispiel #9
0
 private static Map<Set<String>, Double> newGenotypeLikelihoods(
     int leftClip, int rightClip, VariantSample sample) {
   final Map<Set<String>, Double> newMap = new HashMap<>();
   final Map<Set<String>, Double> originalLikelihoods = sample.getGenotypeLikelihoods();
   if (originalLikelihoods != null) {
     for (Map.Entry<Set<String>, Double> entry : originalLikelihoods.entrySet()) {
       final Set<String> newSet = new HashSet<>();
       for (String s : entry.getKey()) {
         newSet.add(StringUtils.clip(s, leftClip, rightClip));
       }
       final Double v = newMap.get(newSet);
       final double existing = v == null ? LogApproximatePossibility.SINGLETON.zero() : v;
       newMap.put(newSet, LogApproximatePossibility.SINGLETON.add(existing, entry.getValue()));
     }
     return newMap;
   } else {
     return null;
   }
 }
Beispiel #10
0
  private static Variant createSplitVariant(
      final Variant original, final int start, final int end, final int id) {
    final VariantLocus newLocus = createLocus(original, start, end);
    final VariantSample[] newSamples;
    if (original.getNumberOfSamples() > 0) {
      // trim description
      Description oldDescription = DescriptionNone.SINGLETON;
      for (int i = 0; i < original.getNumberOfSamples(); i++) {
        if (original.getSample(i) != null
            && !(original.getSample(i).getStats().counts().getDescription()
                instanceof DescriptionNone)) {
          oldDescription = original.getSample(i).getStats().counts().getDescription();
        }
      }
      // Incrementally build up mapping of old alleles to new alleles
      final LinkedHashMap<String, Integer> alleles = new LinkedHashMap<>();
      final int[] alleleMap = new int[oldDescription.size()];
      for (int i = 0; i < oldDescription.size(); i++) {
        final String clipped = StringUtils.clip(oldDescription.name(i), start, end);
        Integer newPos = alleles.get(clipped);
        if (newPos == null) {
          newPos = alleles.size();
        }
        alleles.put(clipped, newPos);
        alleleMap[i] = newPos;
      }

      final Description newDescription =
          new DescriptionCommon(alleles.keySet().toArray(new String[alleles.size()]));
      newSamples = createVariants(original, start, end, newDescription, alleleMap);
    } else {
      newSamples = new VariantSample[0];
    }

    final Variant result = new Variant(newLocus, newSamples);
    Variant.copy(original, result);
    result.setPossibleCause(createVariantName(start, end, original.getPossibleCause()));
    result.setSplitId(id);
    return result;
  }
Beispiel #11
0
  /**
   * Trim a variant by removing common prefix and suffix from each call and reference and adjusting
   * all the position information accordingly.
   *
   * @param original the original call
   * @return trimmed variant (possibly original if no change made)
   */
  static Variant trim(final Variant original) {

    final String ref = original.getLocus().getRefNts();
    if (ref.length() == 0) {
      // Cannot possibly trim if we are inserting, all bases are to be inserted
      return original;
    }

    // Compute set of called alleles and exit if there are none
    final HashSet<String> catSet = extractCalledAlleles(original);
    if (catSet.size() == 0) {
      return original;
    }

    // Include actual reference sequence (this will not be "")
    assert ref.length() > 0;
    catSet.add(ref);

    // Compute maximal clip positions based on set of alleles
    final String[] cats = catSet.toArray(new String[catSet.size()]);

    final int rightClip = StringUtils.longestSuffix(cats);
    final int leftClip = StringUtils.longestPrefix(rightClip, cats);
    // final int leftClip = StringUtils.longestPrefix(cats);
    // final int rightClip = StringUtils.longestSuffix(cats, leftClip);

    // Quick exit if no trimming is possible
    if (leftClip == 0 && rightClip == 0) {
      return original;
    }

    // Create new locus, will be "" for case of pure insertion
    final VariantLocus newLocus = createLocus(original, leftClip, rightClip);

    final VariantSample[] newSamples;
    if (original.getNumberOfSamples() > 0) {
      // trim description
      Description oldDescription = DescriptionNone.SINGLETON;
      for (int i = 0; i < original.getNumberOfSamples(); i++) {
        if (original.getSample(i) != null
            && !(original.getSample(i).getStats().counts().getDescription()
                instanceof DescriptionNone)) {
          oldDescription = original.getSample(i).getStats().counts().getDescription();
        }
      }
      final String[] alleles = new String[oldDescription.size()];
      final int[] alleleMap = new int[oldDescription.size()];
      for (int i = 0; i < oldDescription.size(); i++) {
        final String clipped = StringUtils.clip(oldDescription.name(i), leftClip, rightClip);
        alleles[i] = clipped;
        alleleMap[i] = i;
      }

      final Description newDescription = new DescriptionCommon(alleles);
      // Create new trimmed samples
      newSamples = createVariants(original, leftClip, rightClip, newDescription, alleleMap);
    } else {
      newSamples = new VariantSample[0];
    }

    // Create new variant from the new samples
    final Variant result = new Variant(newLocus, newSamples);
    Variant.copy(original, result);
    // This must be done after the copy
    result.setPossibleCause(createVariantName(leftClip, rightClip, original.getPossibleCause()));
    result.setTrimmed();
    // System.err.println(original);
    // System.err.println(result);
    return result;
  }