private static Type typeOfBiallelicVariant(Allele ref, Allele allele) { if (ref.isSymbolic()) throw new IllegalStateException( "Unexpected error: encountered a record with a symbolic reference allele"); if (allele.isSymbolic()) return Type.SYMBOLIC; if (ref.length() == allele.length()) { if (allele.length() == 1) return Type.SNP; else return Type.MNP; } // Important note: previously we were checking that one allele is the prefix of the other. // However, that's not an // appropriate check as can be seen from the following example: // REF = CTTA and ALT = C,CT,CA // This should be assigned the INDEL type but was being marked as a MIXED type because of the // prefix check. // In truth, it should be absolutely impossible to return a MIXED type from this method because // it simply // performs a pairwise comparison of a single alternate allele against the reference allele // (whereas the MIXED type // is reserved for cases of multiple alternate alleles of different types). Therefore, if we've // reached this point // in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL. return Type.INDEL; // old incorrect logic: // if (oneIsPrefixOfOther(ref, allele)) // return Type.INDEL; // else // return Type.MIXED; }
private static Allele determineReferenceAllele(List<VariantContext> VCs) { Allele ref = null; for (VariantContext vc : VCs) { Allele myRef = vc.getReference(); if (ref == null || ref.length() < myRef.length()) ref = myRef; else if (ref.length() == myRef.length() && !ref.equals(myRef)) throw new UserException.BadInput( String.format( "The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s", vc.getChr(), vc.getStart(), ref, myRef)); } return ref; }
private static AlleleMapper resolveIncompatibleAlleles( Allele refAllele, VariantContext vc, Set<Allele> allAlleles) { if (refAllele.equals(vc.getReference())) return new AlleleMapper(vc); else { // we really need to do some work. The refAllele is the longest reference allele seen at this // start site. So imagine it is: // // refAllele: ACGTGA // myRef: ACGT // myAlt: - // // We need to remap all of the alleles in vc to include the extra GA so that // myRef => refAllele and myAlt => GA // Allele myRef = vc.getReference(); if (refAllele.length() <= myRef.length()) throw new ReviewedStingException( "BUG: myRef=" + myRef + " is longer than refAllele=" + refAllele); byte[] extraBases = Arrays.copyOfRange(refAllele.getBases(), myRef.length(), refAllele.length()); // System.out.printf("Remapping allele at %s%n", vc); // System.out.printf("ref %s%n", refAllele); // System.out.printf("myref %s%n", myRef ); // System.out.printf("extrabases %s%n", new String(extraBases)); Map<Allele, Allele> map = new HashMap<Allele, Allele>(); for (Allele a : vc.getAlleles()) { if (a.isReference()) map.put(a, refAllele); else { Allele extended = Allele.extend(a, extraBases); for (Allele b : allAlleles) if (extended.equals(b)) extended = b; // System.out.printf(" Extending %s => %s%n", a, extended); map.put(a, extended); } } // debugging // System.out.printf("mapping %s%n", map); return new AlleleMapper(map); } }
/** * Gets the sizes of the alternate alleles if they are insertion/deletion events, and returns a * list of their sizes * * @return a list of indel lengths ( null if not of type indel or mixed ) */ public List<Integer> getIndelLengths() { if (getType() != Type.INDEL && getType() != Type.MIXED) { return null; } List<Integer> lengths = new ArrayList<Integer>(); for (Allele a : getAlternateAlleles()) { lengths.add(a.length() - getReference().length()); } return lengths; }
private RepeatDetectorTest( boolean isTrueRepeat, String ref, String refAlleleString, String... altAlleleStrings) { super(RepeatDetectorTest.class); this.ref = "N" + ref; // add a dummy base for the event here this.isTrueRepeat = isTrueRepeat; List<Allele> alleles = new LinkedList<Allele>(); final Allele refAllele = Allele.create(refAlleleString, true); alleles.add(refAllele); for (final String altString : altAlleleStrings) { final Allele alt = Allele.create(altString, false); alleles.add(alt); } VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, 1 + refAllele.length(), alleles); this.vc = builder.make(); }
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { // see if we need to trim common reference base from all alleles boolean trimVC; // We need to trim common reference base from all alleles in all genotypes if a ref base is // common to all alleles Allele refAllele = inputVC.getReference(); if (!inputVC.isVariant()) trimVC = false; else if (refAllele.isNull()) trimVC = false; else { trimVC = (AbstractVCFCodec.computeForwardClipping( new ArrayList<Allele>(inputVC.getAlternateAlleles()), inputVC.getReference().getDisplayString()) > 0); } // nothing to do if we don't need to trim bases if (trimVC) { List<Allele> alleles = new ArrayList<Allele>(); GenotypesContext genotypes = GenotypesContext.create(); // set the reference base for indels in the attributes Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes()); Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>(); for (Allele a : inputVC.getAlleles()) { if (a.isSymbolic()) { alleles.add(a); originalToTrimmedAlleleMap.put(a, a); } else { // get bases for current allele and create a new one with trimmed bases byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length()); Allele trimmedAllele = Allele.create(newBases, a.isReference()); alleles.add(trimmedAllele); originalToTrimmedAlleleMap.put(a, trimmedAllele); } } // detect case where we're trimming bases but resulting vc doesn't have any null allele. In // that case, we keep original representation // example: mixed records such as {TA*,TGA,TG} boolean hasNullAlleles = false; for (Allele a : originalToTrimmedAlleleMap.values()) { if (a.isNull()) hasNullAlleles = true; if (a.isReference()) refAllele = a; } if (!hasNullAlleles) return inputVC; // now we can recreate new genotypes with trimmed alleles for (final Genotype genotype : inputVC.getGenotypes()) { List<Allele> originalAlleles = genotype.getAlleles(); List<Allele> trimmedAlleles = new ArrayList<Allele>(); for (Allele a : originalAlleles) { if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); else trimmedAlleles.add(Allele.NO_CALL); } genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles)); } final VariantContextBuilder builder = new VariantContextBuilder(inputVC); return builder .alleles(alleles) .genotypes(genotypes) .attributes(attributes) .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])) .make(); } return inputVC; }