public void testRemoveMulti() throws Exception { try (TestDirectory td = new TestDirectory()) { final File f = FileHelper.resourceToGzFile( "com/rtg/vcf/resources/vcfsubset.vcf", new File(td, "vcf.vcf.gz")); final File out = new File(td, "out.vcf"); checkMainInitOk( "-i", f.getPath(), "-o", out.getPath(), "--remove-samples", "--keep-info", "AN", "--keep-info", "AC", "--keep-filter", "YEA", "-Z"); final String content = FileHelper.fileToString(out); final String editedContent = StringUtils.grepMinusV(StringUtils.grepMinusV(content, "^##RUN-ID"), "^##CL"); mNano.check("vcfsubset-multi.vcf", editedContent); } }
public void testExplosion() throws Exception { try (TestDirectory td = new TestDirectory()) { final File f = FileHelper.resourceToGzFile( "com/rtg/vcf/resources/vcfsubset.vcf", new File(td, "vcf.vcf.gz")); final File out = new File(td, "out.vcf"); assertEquals( "Records skipped due to invalid or incompatible sample fields: 1" + StringUtils.LS, checkMainInitWarn( "-i", f.getPath(), "-o", out.getPath(), "--remove-format", "GT", "--remove-format", "DS", "-Z")); final String content = FileHelper.fileToString(out); final String nonheader = StringUtils.grepMinusV(content, "^#"); assertTrue(nonheader.startsWith("X\t60052")); } }
private static String createVariantName( final int leftClip, final int rightClip, final String oldName) { if (oldName == null) { return oldName; } final int colon = oldName.indexOf(VariantUtils.COLON); if (colon >= 0) { assert colon >= 0 && oldName.indexOf(VariantUtils.COLON, colon + 1) == -1; final String allele1 = StringUtils.clip(oldName.substring(0, colon), leftClip, rightClip); final String allele2 = StringUtils.clip(oldName.substring(colon + 1), leftClip, rightClip); return allele1 + VariantUtils.COLON + allele2; } else { assert oldName.indexOf(VariantUtils.COLON) == -1; return StringUtils.clip(oldName, leftClip, rightClip); } }
private static VariantLocus createLocus( final Variant original, final int leftClip, final int rightClip) { final VariantLocus locus = original.getLocus(); final char newPrevNt = leftClip == 0 ? locus.getPreviousRefNt() : locus.getRefNts().charAt(leftClip - 1); final String newReference = StringUtils.clip(locus.getRefNts(), leftClip, rightClip); final int newStart = locus.getStart() + leftClip; final int newEnd = locus.getEnd() - rightClip; return new VariantLocus(locus.getSequenceName(), newStart, newEnd, newReference, newPrevNt); }
// Can't use Description, as that includes hypotheses that weren't necessarily called. private static HashSet<String> extractCalledAlleles(final Variant variant) { final HashSet<String> alleles = new HashSet<>(); for (int k = 0; k < variant.getNumberOfSamples(); k++) { final VariantSample vs = variant.getSample(k); if (vs != null) { if (!vs.isIdentity()) { Collections.addAll(alleles, StringUtils.split(vs.getName(), VariantUtils.COLON)); } } } return alleles; }
void infoString(final StringBuilder sb) { sb.append("Memory Usage: ").append(mNumberSequences).append(" sequences").append(LS); long totalBytes = mData.infoString(sb); if (mNames != null) { sb.append("\t\t") .append(StringUtils.commas(mNames.bytes())) .append("\t") .append(StringUtils.commas(mNames.length())) .append("\tNames") .append(LS); totalBytes += mNames.bytes(); } if (mNameSuffixes != null) { sb.append("\t\t") .append(StringUtils.commas(mNameSuffixes.bytes())) .append("\t") .append(StringUtils.commas(mNameSuffixes.length())) .append("\tSuffixes") .append(LS); totalBytes += mNameSuffixes.bytes(); } sb.append("\t\t").append(StringUtils.commas(totalBytes)).append("\t\tTotal bytes").append(LS); }
public void testKeepSamples() throws Exception { try (TestDirectory td = new TestDirectory()) { final File f = FileHelper.resourceToGzFile( "com/rtg/vcf/resources/vcfsubset.vcf", new File(td, "vcf.vcf.gz")); final File out = new File(td, "out.vcf"); checkMainInitOk( "-i", f.getPath(), "-o", out.getPath(), "--keep-sample", "HG00096", "--keep-sample", "HG00100", "-Z"); final String content = FileHelper.fileToString(out); final String editedContent = StringUtils.grepMinusV(StringUtils.grepMinusV(content, "^##RUN-ID"), "^##CL"); mNano.check("vcfsubset-keepsamples.vcf", editedContent); } }
@Override public void toString(final StringBuilder sb) { sb.append("Contaminated Cancer Model"); final FormatReal fmt = new FormatReal(4, 3); sb.append(" contamination=").append(fmt.format(mContamination)); sb.append(LS); final int pad = hypotheses().nameLength(); final int size = ((HypothesesCancer) hypotheses()).subHypotheses().size(); for (int i = 0; i < size; i++) { sb.append(StringUtils.padLeft(hypotheses().name(i), pad)); for (int j = 0; j < size; j++) { final int k = hypotheses().code().code(i, j); sb.append(fmt.format(arithmetic().poss2Ln(mPosteriors[k]))); } sb.append(LS); } }
private static Map<Set<String>, Double> newGenotypeLikelihoods( int leftClip, int rightClip, VariantSample sample) { final Map<Set<String>, Double> newMap = new HashMap<>(); final Map<Set<String>, Double> originalLikelihoods = sample.getGenotypeLikelihoods(); if (originalLikelihoods != null) { for (Map.Entry<Set<String>, Double> entry : originalLikelihoods.entrySet()) { final Set<String> newSet = new HashSet<>(); for (String s : entry.getKey()) { newSet.add(StringUtils.clip(s, leftClip, rightClip)); } final Double v = newMap.get(newSet); final double existing = v == null ? LogApproximatePossibility.SINGLETON.zero() : v; newMap.put(newSet, LogApproximatePossibility.SINGLETON.add(existing, entry.getValue())); } return newMap; } else { return null; } }
private static Variant createSplitVariant( final Variant original, final int start, final int end, final int id) { final VariantLocus newLocus = createLocus(original, start, end); final VariantSample[] newSamples; if (original.getNumberOfSamples() > 0) { // trim description Description oldDescription = DescriptionNone.SINGLETON; for (int i = 0; i < original.getNumberOfSamples(); i++) { if (original.getSample(i) != null && !(original.getSample(i).getStats().counts().getDescription() instanceof DescriptionNone)) { oldDescription = original.getSample(i).getStats().counts().getDescription(); } } // Incrementally build up mapping of old alleles to new alleles final LinkedHashMap<String, Integer> alleles = new LinkedHashMap<>(); final int[] alleleMap = new int[oldDescription.size()]; for (int i = 0; i < oldDescription.size(); i++) { final String clipped = StringUtils.clip(oldDescription.name(i), start, end); Integer newPos = alleles.get(clipped); if (newPos == null) { newPos = alleles.size(); } alleles.put(clipped, newPos); alleleMap[i] = newPos; } final Description newDescription = new DescriptionCommon(alleles.keySet().toArray(new String[alleles.size()])); newSamples = createVariants(original, start, end, newDescription, alleleMap); } else { newSamples = new VariantSample[0]; } final Variant result = new Variant(newLocus, newSamples); Variant.copy(original, result); result.setPossibleCause(createVariantName(start, end, original.getPossibleCause())); result.setSplitId(id); return result; }
/** * Trim a variant by removing common prefix and suffix from each call and reference and adjusting * all the position information accordingly. * * @param original the original call * @return trimmed variant (possibly original if no change made) */ static Variant trim(final Variant original) { final String ref = original.getLocus().getRefNts(); if (ref.length() == 0) { // Cannot possibly trim if we are inserting, all bases are to be inserted return original; } // Compute set of called alleles and exit if there are none final HashSet<String> catSet = extractCalledAlleles(original); if (catSet.size() == 0) { return original; } // Include actual reference sequence (this will not be "") assert ref.length() > 0; catSet.add(ref); // Compute maximal clip positions based on set of alleles final String[] cats = catSet.toArray(new String[catSet.size()]); final int rightClip = StringUtils.longestSuffix(cats); final int leftClip = StringUtils.longestPrefix(rightClip, cats); // final int leftClip = StringUtils.longestPrefix(cats); // final int rightClip = StringUtils.longestSuffix(cats, leftClip); // Quick exit if no trimming is possible if (leftClip == 0 && rightClip == 0) { return original; } // Create new locus, will be "" for case of pure insertion final VariantLocus newLocus = createLocus(original, leftClip, rightClip); final VariantSample[] newSamples; if (original.getNumberOfSamples() > 0) { // trim description Description oldDescription = DescriptionNone.SINGLETON; for (int i = 0; i < original.getNumberOfSamples(); i++) { if (original.getSample(i) != null && !(original.getSample(i).getStats().counts().getDescription() instanceof DescriptionNone)) { oldDescription = original.getSample(i).getStats().counts().getDescription(); } } final String[] alleles = new String[oldDescription.size()]; final int[] alleleMap = new int[oldDescription.size()]; for (int i = 0; i < oldDescription.size(); i++) { final String clipped = StringUtils.clip(oldDescription.name(i), leftClip, rightClip); alleles[i] = clipped; alleleMap[i] = i; } final Description newDescription = new DescriptionCommon(alleles); // Create new trimmed samples newSamples = createVariants(original, leftClip, rightClip, newDescription, alleleMap); } else { newSamples = new VariantSample[0]; } // Create new variant from the new samples final Variant result = new Variant(newLocus, newSamples); Variant.copy(original, result); // This must be done after the copy result.setPossibleCause(createVariantName(leftClip, rightClip, original.getPossibleCause())); result.setTrimmed(); // System.err.println(original); // System.err.println(result); return result; }