/** * Is this single change a LOF? * * <p>Criteria: 1) Core splice sites acceptors or donors (only CORE ones) 2) Stop gained (if this * happens at the last part of the protein, we assume it has no effect) 3) Frame shifts * * @param changeEffect * @return */ protected boolean isNmd(ChangeEffect changeEffect) { Transcript tr = changeEffect.getTranscript(); if (tr == null) throw new RuntimeException("Transcript not found for change:\n\t" + changeEffect); // Only one exon? Nothing to do (there is no exon-exon junction) if (tr.numChilds() <= 1) return false; // Find last valid NMD position int lastNmdPos = lastNmdPos(tr); if (lastNmdPos < 0) return false; // No valid 'lastNmdPos'? => There is no NMD event. // Does this change affect the region 'before' this last NMD position? => It is assumed to be // NMD Variant seqChange = changeEffect.getSeqChange(); boolean nmd; if (tr.isStrandPlus()) nmd = seqChange.getStart() <= lastNmdPos; else nmd = lastNmdPos <= seqChange.getEnd(); // Update sets and counters if (nmd) { transcriptsNmd.add( changeEffect .getTranscript()); // Unique transcripts affected (WARNING: null will be added) genesNmd.add(changeEffect.getGene()); // Unique genes affected (WARNING: null will be added) nmdCount++; } return nmd; }
/** * Is this deletion a LOF? * * <p>Criteria: 1) First (coding) exon deleted 2) More than 50% of coding sequence deleted * * @param changeEffect * @return */ protected boolean isLofDeletion(ChangeEffect changeEffect) { Transcript tr = changeEffect.getTranscript(); if (tr == null) throw new RuntimeException("Transcript not found for change:\n\t" + changeEffect); // --- // Criteria: // 1) First (coding) exon deleted // --- if (changeEffect.getEffectType() == EffectType.EXON_DELETED) { Variant seqChange = changeEffect.getSeqChange(); if (seqChange == null) throw new RuntimeException("Cannot retrieve 'seqChange' from EXON_DELETED effect!"); if (seqChange.includes(tr.getFirstCodingExon())) return true; } // --- // Criteria: // 2) More than 50% of coding sequence deleted // --- // Find coding part of the transcript (i.e. no UTRs) Variant seqChange = changeEffect.getSeqChange(); int cdsStart = tr.isStrandPlus() ? tr.getCdsStart() : tr.getCdsEnd(); int cdsEnd = tr.isStrandPlus() ? tr.getCdsEnd() : tr.getCdsStart(); Marker coding = new Marker(seqChange.getChromosome(), cdsStart, cdsEnd, 1, ""); // Create an interval intersecting the CDS and the deletion int start = Math.max(cdsStart, seqChange.getStart()); int end = Math.min(cdsEnd, seqChange.getEnd()); if (start >= end) return false; // No intersections with coding part of the exon? => not LOF Marker codingDeleted = new Marker(seqChange.getChromosome(), start, end, 1, ""); // Count: // - number of coding bases deleted // - number of coding bases int codingBasesDeleted = 0, codingBases = 0; for (Exon exon : tr) { codingBasesDeleted += codingDeleted.intersectSize(exon); codingBases += coding.intersectSize(exon); } // More than a threshold? => It is a LOF double percDeleted = codingBasesDeleted / ((double) codingBases); return (percDeleted > deleteProteinCodingBases); }
/** * Is this single change a LOF? * * <p>Criteria: 1) Core splice sites acceptors or donors (only CORE ones) 2) Stop gained (if this * happens at the last part of the protein, we assume it has no effect) 3) Frame shifts * * @param changeEffect * @return */ protected boolean isLof(ChangeEffect changeEffect) { // Not a sequence change? => Not LOF if ((changeEffect.getSeqChange() != null) && (!changeEffect.getSeqChange().isVariant())) return false; // Is this change affecting a protein coding gene? Gene gene = changeEffect.getGene(); Transcript tr = changeEffect.getTranscript(); if ((gene == null) // No gene affected? || (tr == null) // No transcript affected? || (!gene.isProteinCoding() && !config.isTreatAllAsProteinCoding()) // Not a protein coding gene? || (!tr.isProteinCoding() && !config.isTreatAllAsProteinCoding()) // Not a protein coding transcript? ) return false; // --- // Is this variant a LOF? // --- boolean lof = false; // Frame shifts if (changeEffect.getEffectType() == EffectType.FRAME_SHIFT) { // It is assumed that even with a protein coding change at the last 5% of the protein, the // protein could still be functional. double perc = percentCds(changeEffect); lof |= (ignoreProteinCodingBefore <= perc) && (perc <= ignoreProteinCodingAfter); } // Deletion? Is another method to check if (changeEffect.getSeqChange().isDel()) lof |= isLofDeletion(changeEffect); // The following effect types can be considered LOF switch (changeEffect.getEffectType()) { case SPLICE_SITE_ACCEPTOR: case SPLICE_SITE_DONOR: // Core splice sites are considered LOF if ((changeEffect.getMarker() != null) && (changeEffect.getMarker() instanceof SpliceSite)) { // Get splice site marker and check if it is 'core' SpliceSite spliceSite = (SpliceSite) changeEffect.getMarker(); if (spliceSite.intersectsCoreSpliceSite(changeEffect.getSeqChange())) lof = true; // Does it intersect the CORE splice site? } break; case STOP_GAINED: lof |= isNmd(changeEffect); break; case RARE_AMINO_ACID: case START_LOST: // This one is not in the referenced papers, but we assume that RARE AA and START_LOSS // changes are damaging. lof = true; break; default: // All others are not considered LOF break; } // Update sets if (lof) { transcriptsLof.add( changeEffect .getTranscript()); // Unique transcripts affected (WARNING: null will be added) genesLof.add(changeEffect.getGene()); // Unique genes affected (WARNING: null will be added) } return lof; }