/** * Is this single change a LOF? * * <p>Criteria: 1) Core splice sites acceptors or donors (only CORE ones) 2) Stop gained (if this * happens at the last part of the protein, we assume it has no effect) 3) Frame shifts * * @param changeEffect * @return */ protected boolean isNmd(ChangeEffect changeEffect) { Transcript tr = changeEffect.getTranscript(); if (tr == null) throw new RuntimeException("Transcript not found for change:\n\t" + changeEffect); // Only one exon? Nothing to do (there is no exon-exon junction) if (tr.numChilds() <= 1) return false; // Find last valid NMD position int lastNmdPos = lastNmdPos(tr); if (lastNmdPos < 0) return false; // No valid 'lastNmdPos'? => There is no NMD event. // Does this change affect the region 'before' this last NMD position? => It is assumed to be // NMD Variant seqChange = changeEffect.getSeqChange(); boolean nmd; if (tr.isStrandPlus()) nmd = seqChange.getStart() <= lastNmdPos; else nmd = lastNmdPos <= seqChange.getEnd(); // Update sets and counters if (nmd) { transcriptsNmd.add( changeEffect .getTranscript()); // Unique transcripts affected (WARNING: null will be added) genesNmd.add(changeEffect.getGene()); // Unique genes affected (WARNING: null will be added) nmdCount++; } return nmd; }
/** * Predict the effect of a seqChange * * @param seqChange : Sequence change * @param seqChangeRef : Before analyzing results, we have to change markers using seqChangerRef * to create a new reference 'on the fly' */ public ChangeEffects seqChangeEffect(Variant seqChange, Variant seqChangeRef) { ChangeEffects changeEffects = new ChangeEffects(seqChange, seqChangeRef); // --- // Chromosome missing? // --- if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(seqChange)) { changeEffects.addErrorWarning(ErrorWarningType.ERROR_CHROMOSOME_NOT_FOUND); return changeEffects; } // --- // Check that this is not a huge deletion. // Huge deletions would crash the rest of the algorithm, so we need to stop them here. // --- if (seqChange.isDel() && (seqChange.size() > HUGE_DELETION_SIZE_THRESHOLD)) { // Get chromosome String chromoName = seqChange.getChromosomeName(); Chromosome chr = genome.getChromosome(chromoName); if (chr.size() > 0) { double ratio = seqChange.size() / ((double) chr.size()); if (ratio > HUGE_DELETION_RATIO_THRESHOLD) { changeEffects.add(chr, EffectType.CHROMOSOME_LARGE_DELETION, ""); return changeEffects; } } } // --- // Query interval tree: Which intervals does seqChange intersect? // --- Markers intersects = query(seqChange); // Show all results boolean hitChromo = false, hitSomething = false; if (intersects.size() > 0) { for (Marker marker : intersects) { if (marker instanceof Chromosome) hitChromo = true; // Do we hit any chromosome? else { // Analyze all markers marker.seqChangeEffect(seqChange, changeEffects, seqChangeRef); hitSomething = true; } } } // Any errors or intergenic (i.e. did not hit any gene) if (!hitChromo) { if (Config.get().isErrorChromoHit()) changeEffects.addErrorWarning(ErrorWarningType.ERROR_OUT_OF_CHROMOSOME_RANGE); } else if (!hitSomething) { if (Config.get().isOnlyRegulation()) changeEffects.setEffectType(EffectType.NONE); else changeEffects.setEffectType(EffectType.INTERGENIC); } return changeEffects; }
/** Net AA change (only for InDels) */ public String getAaNetChange() { String aaLong = "", aaShort = ""; if (variant.isIns()) { aaShort = getAaRef().toUpperCase(); aaLong = getAaAlt().toUpperCase(); } else if (variant.isDel()) { aaLong = getAaRef().toUpperCase(); aaShort = getAaAlt().toUpperCase(); } if (aaLong.startsWith(aaShort)) return aaLong.substring(aaShort.length()); if (aaLong.endsWith(aaLong)) return aaLong.substring(0, aaLong.length() - aaShort.length()); return aaLong; }
@Override public int compareTo(VariantEffect varEffOther) { // Sort by impact int comp = getEffectImpact().compareTo(varEffOther.getEffectImpact()); if (comp != 0) return comp; // Sort by effect comp = getEffectType().compareTo(varEffOther.getEffectType()); if (comp != 0) return comp; // Sort by: Is canonical transcript? Transcript trThis = getTranscript(); Transcript trOther = varEffOther.getTranscript(); if (trThis != null && trOther != null) { comp = (trOther.isCanonical() ? 1 : 0) - (trThis.isCanonical() ? 1 : 0); } if (comp != 0) return comp; // Sort by genomic coordinate of affected 'marker' if ((trThis != null) && (trOther != null)) comp = trThis.compareToPos(trOther); if (comp != 0) return comp; // Compare IDs if ((trThis != null) && (trOther != null)) comp = trThis.getId().compareTo(trOther.getId()); if (comp != 0) return comp; // Compare by marker if ((getMarker() != null) && (varEffOther.getMarker() != null)) comp = getMarker().compareToPos(varEffOther.getMarker()); if (comp != 0) return comp; // Sort by variant (most of the time this is equal) return variant.compareTo(varEffOther.getVariant()); }
public static void main(String[] args) { Timer.showStdErr("Start"); // Create an input file iterator String inputFile = Gpr.HOME + "/snpEff/ins.txt"; VariantFileIterator seqChangeFileIterator = new SeqChangeTxtFileIterator(inputFile); // --- // Iterate over input file // --- for (Variant seqChange : seqChangeFileIterator) { System.out.println(seqChange + "\t" + seqChange.getChangeType()); } // Zzz zzz = new Zzz(null); // zzz.load("testHg3771Chr1"); // zzz.run(); // Timer.showStdErr("End"); }
/** Return functional class of this effect (i.e. NONSENSE, MISSENSE, SILENT or NONE) */ public FunctionalClass getFunctionalClass() { if (variant.isSnp()) { if (!aaAlt.equals(aaRef)) { CodonTable codonTable = marker.codonTable(); if (codonTable.isStop(codonsAlt)) return FunctionalClass.NONSENSE; return FunctionalClass.MISSENSE; } if (!codonsAlt.equals(codonsRef)) return FunctionalClass.SILENT; } return FunctionalClass.NONE; }
/** Return impact of this effect */ public EffectImpact getEffectImpact() { if (effectImpact == null) { if ((variant != null) && (!variant.isVariant())) { // Not a change? => Modifier effectImpact = EffectImpact.MODIFIER; } else { // Get efefct's type highest impact effectImpact = EffectImpact.MODIFIER; for (EffectImpact eimp : effectImpacts) if (eimp.compareTo(effectImpact) < 0) effectImpact = eimp; } } return effectImpact; }
/** * Is this deletion a LOF? * * <p>Criteria: 1) First (coding) exon deleted 2) More than 50% of coding sequence deleted * * @param changeEffect * @return */ protected boolean isLofDeletion(ChangeEffect changeEffect) { Transcript tr = changeEffect.getTranscript(); if (tr == null) throw new RuntimeException("Transcript not found for change:\n\t" + changeEffect); // --- // Criteria: // 1) First (coding) exon deleted // --- if (changeEffect.getEffectType() == EffectType.EXON_DELETED) { Variant seqChange = changeEffect.getSeqChange(); if (seqChange == null) throw new RuntimeException("Cannot retrieve 'seqChange' from EXON_DELETED effect!"); if (seqChange.includes(tr.getFirstCodingExon())) return true; } // --- // Criteria: // 2) More than 50% of coding sequence deleted // --- // Find coding part of the transcript (i.e. no UTRs) Variant seqChange = changeEffect.getSeqChange(); int cdsStart = tr.isStrandPlus() ? tr.getCdsStart() : tr.getCdsEnd(); int cdsEnd = tr.isStrandPlus() ? tr.getCdsEnd() : tr.getCdsStart(); Marker coding = new Marker(seqChange.getChromosome(), cdsStart, cdsEnd, 1, ""); // Create an interval intersecting the CDS and the deletion int start = Math.max(cdsStart, seqChange.getStart()); int end = Math.min(cdsEnd, seqChange.getEnd()); if (start >= end) return false; // No intersections with coding part of the exon? => not LOF Marker codingDeleted = new Marker(seqChange.getChromosome(), start, end, 1, ""); // Count: // - number of coding bases deleted // - number of coding bases int codingBasesDeleted = 0, codingBases = 0; for (Exon exon : tr) { codingBasesDeleted += codingDeleted.intersectSize(exon); codingBases += coding.intersectSize(exon); } // More than a threshold? => It is a LOF double percDeleted = codingBasesDeleted / ((double) codingBases); return (percDeleted > deleteProteinCodingBases); }
/** Annotate a VCF entry */ public boolean annotate(Variant variant, Map<String, String> info) { if (verbose) Gpr.showMark(++countVariants, SHOW_EVERY); // Find in database Collection<DbNsfpEntry> dbEntries = dbNsfp.query(variant); if (dbEntries == null || dbEntries.isEmpty()) return false; // Add all INFO fields that refer to this allele boolean annotated = false; for (String fieldKey : fieldsToAdd.keySet()) { // Are there any values to annotate? String infoValue = getVcfInfo(dbEntries, fieldKey); // Missing or empty? if (annotateEmpty) { if (infoValue.isEmpty()) infoValue = "."; } else if (isDbNsfpValueEmpty(infoValue)) { infoValue = null; } // Add annotations if (infoValue != null) { String oldInfo = info.get(fieldKey); if (oldInfo == null) oldInfo = ""; info.put(fieldKey, oldInfo + (oldInfo.isEmpty() ? "" : ",") + infoValue); annotated = true; } } // Show progress if (annotated) { countAnnotated++; if (debug) Gpr.debug("Annotated: " + variant.toStr()); } return annotated; }
public String toString(boolean useSeqOntology, boolean useHgvs) { // Get data to show String geneId = "", geneName = "", bioType = "", transcriptId = "", exonId = "", customId = ""; int exonRank = -1; if (marker != null) { // Gene Id, name and biotype Gene gene = getGene(); Transcript tr = getTranscript(); // CDS size info if (gene != null) { geneId = gene.getId(); geneName = gene.getGeneName(); bioType = getBiotype(); } // Update trId if (tr != null) transcriptId = tr.getId(); // Exon rank information Exon exon = getExon(); if (exon != null) { exonId = exon.getId(); exonRank = exon.getRank(); } // Regulation if (isRegulation()) bioType = ((Regulation) marker).getCellType(); } // Add seqChage's ID if (!variant.getId().isEmpty()) customId += variant.getId(); // Add custom markers if ((marker != null) && (marker instanceof Custom)) customId += (customId.isEmpty() ? "" : ";") + marker.getId(); // CDS length int cdsSize = getCdsLength(); String errWarn = error + (error.isEmpty() ? "" : "|") + warning; String aaChange = ""; if (useHgvs) aaChange = getHgvs(); else aaChange = ((aaRef.length() + aaAlt.length()) > 0 ? aaRef + "/" + aaAlt : ""); return errWarn // + "\t" + geneId // + "\t" + geneName // + "\t" + bioType // + "\t" + transcriptId // + "\t" + exonId // + "\t" + (exonRank >= 0 ? exonRank : "") // + "\t" + effect(false, false, false, useSeqOntology) // + "\t" + aaChange // + "\t" + ((codonsRef.length() + codonsAlt.length()) > 0 ? codonsRef + "/" + codonsAlt : "") // + "\t" + (codonNum >= 0 ? (codonNum + 1) : "") // + "\t" + (codonDegeneracy >= 0 ? codonDegeneracy + "" : "") // + "\t" + (cdsSize >= 0 ? cdsSize : "") // + "\t" + (codonsAroundOld.length() > 0 ? codonsAroundOld + " / " + codonsAroundNew : "") // + "\t" + (aasAroundOld.length() > 0 ? aasAroundOld + " / " + aasAroundNew : "") // + "\t" + customId // ; }
/** Get genotype string */ public String getGenotype() { if (variant == null) return ""; return variant.getGenotype(); }
/** Codon change string (if it's not too long) */ public String getCodonChangeMax() { if (variant.size() > MAX_CODON_SEQUENCE_LEN) return ""; // Cap length in order not to make VCF files grow too much if (codonsRef.isEmpty() && codonsAlt.isEmpty()) return ""; return codonsRef + "/" + codonsAlt; }
void analyze(int i, int pos, String ref, String mnp) { String codons = codons(); Variant seqChange = new Variant(chromosome, pos, ref + "", mnp + "", ""); // --- // Calculate effects // --- VariantEffects effects = snpEffectPredictor.variantEffect(seqChange); // Show VariantEffect effect = null; if (effects.size() > 1) { // Usually there is only one effect for (VariantEffect ce : effects) { if ((ce.getEffectType() != EffectType.SPLICE_SITE_ACCEPTOR) // && (ce.getEffectType() != EffectType.SPLICE_SITE_DONOR) // && (ce.getEffectType() != EffectType.INTRON) // && (ce.getEffectType() != EffectType.INTERGENIC) // ) // effect = ce; } } else effect = effects.get(); if (effect != null) { String effStr = effect.effect(true, true, true, false); if (codons.length() > 1) { String codonsExp[] = codons.split("/"); boolean error = (!codonsExp[0].toUpperCase().equals(effect.getCodonsRef().toUpperCase()) // || !codonsExp[1].toUpperCase().equals(effect.getCodonsAlt().toUpperCase())); if (error || debug) { Gpr.debug( "Fatal error:" // + "\n\tPos : " + pos // + "\n\tSeqChange : " + seqChange + (seqChange.isStrandPlus() ? "+" : "-") // + "\n\tCodon (exp) : " + codons // + "\n\tCodon (pred) : " + effect.getCodonsRef().toUpperCase() + "/" + effect.getCodonsAlt().toUpperCase() // + "\n\tEffect (pred) : " + effStr // + "\n\tEffect (pred) : " + effect // + "\n\tGene : " + gene // + "\n\tChromo : " + chromoSequence // ); } /** Error? Dump so we can debug... */ if (error) { System.err.println("Error. Dumping data"); Save save = new Save(); save.snpEffectPredictor = snpEffectPredictor; save.chromoSequence = chromoSequence; save.chromoNewSequence = chromoNewSequence; save.ref = ref; save.pos = pos; save.mnp = mnp; String outFile = "/tmp/sep_" + i + "_" + pos + ".bin"; Gpr.toFileSerialize(outFile, save); throw new RuntimeException("Codons do not match!\n\tData dumped: '" + outFile + "'"); } } } }