/** * Find closest gene to this marker * * <p>In case more than one 'closest' gene is found (e.g. two or more genes at the same distance). * The following rules apply: * * <p>i) If many genes have the same 'closest distance', coding genes are preferred. * * <p>ii) If more than one coding gene has the same 'closet distance', a random gene is returned. * * @param inputInterval */ public Gene queryClosestGene(Marker inputInterval) { int initialExtension = 1000; String chrName = inputInterval.getChromosomeName(); Chromosome chr = genome.getChromosome(chrName); if (chr == null) return null; if (chr.size() > 0) { // Extend interval to capture 'close' genes for (int extend = initialExtension; extend < chr.size(); extend *= 2) { int start = Math.max(inputInterval.getStart() - extend, 0); int end = inputInterval.getEnd() + extend; Marker extended = new Marker(chr, start, end, 1, ""); // Find all genes that intersect with the interval Markers markers = query(extended); Markers genes = new Markers(); int minDist = Integer.MAX_VALUE; for (Marker m : markers) { if (m instanceof Gene) { int dist = m.distance(inputInterval); if (dist < minDist) { genes.add(m); minDist = dist; } } } // Found something? if (genes.size() > 0) { // Find a gene having distance 'minDist'. Prefer coding genes Gene minDistGene = null; for (Marker m : genes) { int dist = m.distance(inputInterval); if (dist == minDist) { Gene gene = (Gene) m; if (minDistGene == null) minDistGene = gene; else if (!minDistGene.isProteinCoding() && gene.isProteinCoding()) minDistGene = gene; } } return minDistGene; } } } // Nothing found return null; }
/** * Add into to a hash * * @param hits * @param marker * @param hit2add * @param showGeneDetails * @param compareTemplate */ void regionsAddHit( HashSet<String> hits, Marker hit2add, Marker marker, boolean showGeneDetails, boolean compareTemplate) { String hitStr = hit2add.getClass().getSimpleName(); if (compareTemplate) { Gene gene = (Gene) hit2add.findParent(Gene.class); if (gene != null) hitStr += (hit2add.isStrandPlus() == marker.isStrandPlus()) ? "_TEMPLATE_STRAND" : "_NON_TEMPLATE_STRAND"; } if (showGeneDetails && (hit2add instanceof Gene)) { Gene gene = (Gene) hit2add; hitStr += "[" + gene.getBioType() + ", " + gene.getGeneName() + ", " + (gene.isProteinCoding() ? "protein" : "not-protein") + "]"; } hits.add(hitStr); // Add marker name to the list }
/** Get biotype */ public String getBiotype() { Gene gene = getGene(); if (gene == null) return ""; Transcript tr = getTranscript(); if (tr != null) return tr.getBioType(); else if (gene.getGenome().hasCodingInfo()) return (gene.isProteinCoding() ? "coding" : "non-coding"); return ""; }
/** * Is this single change a LOF? * * <p>Criteria: 1) Core splice sites acceptors or donors (only CORE ones) 2) Stop gained (if this * happens at the last part of the protein, we assume it has no effect) 3) Frame shifts * * @param changeEffect * @return */ protected boolean isLof(ChangeEffect changeEffect) { // Not a sequence change? => Not LOF if ((changeEffect.getSeqChange() != null) && (!changeEffect.getSeqChange().isVariant())) return false; // Is this change affecting a protein coding gene? Gene gene = changeEffect.getGene(); Transcript tr = changeEffect.getTranscript(); if ((gene == null) // No gene affected? || (tr == null) // No transcript affected? || (!gene.isProteinCoding() && !config.isTreatAllAsProteinCoding()) // Not a protein coding gene? || (!tr.isProteinCoding() && !config.isTreatAllAsProteinCoding()) // Not a protein coding transcript? ) return false; // --- // Is this variant a LOF? // --- boolean lof = false; // Frame shifts if (changeEffect.getEffectType() == EffectType.FRAME_SHIFT) { // It is assumed that even with a protein coding change at the last 5% of the protein, the // protein could still be functional. double perc = percentCds(changeEffect); lof |= (ignoreProteinCodingBefore <= perc) && (perc <= ignoreProteinCodingAfter); } // Deletion? Is another method to check if (changeEffect.getSeqChange().isDel()) lof |= isLofDeletion(changeEffect); // The following effect types can be considered LOF switch (changeEffect.getEffectType()) { case SPLICE_SITE_ACCEPTOR: case SPLICE_SITE_DONOR: // Core splice sites are considered LOF if ((changeEffect.getMarker() != null) && (changeEffect.getMarker() instanceof SpliceSite)) { // Get splice site marker and check if it is 'core' SpliceSite spliceSite = (SpliceSite) changeEffect.getMarker(); if (spliceSite.intersectsCoreSpliceSite(changeEffect.getSeqChange())) lof = true; // Does it intersect the CORE splice site? } break; case STOP_GAINED: lof |= isNmd(changeEffect); break; case RARE_AMINO_ACID: case START_LOST: // This one is not in the referenced papers, but we assume that RARE AA and START_LOSS // changes are damaging. lof = true; break; default: // All others are not considered LOF break; } // Update sets if (lof) { transcriptsLof.add( changeEffect .getTranscript()); // Unique transcripts affected (WARNING: null will be added) genesLof.add(changeEffect.getGene()); // Unique genes affected (WARNING: null will be added) } return lof; }