/** * Predict the effect of a seqChange * * @param seqChange : Sequence change * @param seqChangeRef : Before analyzing results, we have to change markers using seqChangerRef * to create a new reference 'on the fly' */ public ChangeEffects seqChangeEffect(Variant seqChange, Variant seqChangeRef) { ChangeEffects changeEffects = new ChangeEffects(seqChange, seqChangeRef); // --- // Chromosome missing? // --- if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(seqChange)) { changeEffects.addErrorWarning(ErrorWarningType.ERROR_CHROMOSOME_NOT_FOUND); return changeEffects; } // --- // Check that this is not a huge deletion. // Huge deletions would crash the rest of the algorithm, so we need to stop them here. // --- if (seqChange.isDel() && (seqChange.size() > HUGE_DELETION_SIZE_THRESHOLD)) { // Get chromosome String chromoName = seqChange.getChromosomeName(); Chromosome chr = genome.getChromosome(chromoName); if (chr.size() > 0) { double ratio = seqChange.size() / ((double) chr.size()); if (ratio > HUGE_DELETION_RATIO_THRESHOLD) { changeEffects.add(chr, EffectType.CHROMOSOME_LARGE_DELETION, ""); return changeEffects; } } } // --- // Query interval tree: Which intervals does seqChange intersect? // --- Markers intersects = query(seqChange); // Show all results boolean hitChromo = false, hitSomething = false; if (intersects.size() > 0) { for (Marker marker : intersects) { if (marker instanceof Chromosome) hitChromo = true; // Do we hit any chromosome? else { // Analyze all markers marker.seqChangeEffect(seqChange, changeEffects, seqChangeRef); hitSomething = true; } } } // Any errors or intergenic (i.e. did not hit any gene) if (!hitChromo) { if (Config.get().isErrorChromoHit()) changeEffects.addErrorWarning(ErrorWarningType.ERROR_OUT_OF_CHROMOSOME_RANGE); } else if (!hitSomething) { if (Config.get().isOnlyRegulation()) changeEffects.setEffectType(EffectType.NONE); else changeEffects.setEffectType(EffectType.INTERGENIC); } return changeEffects; }
/** * Find closest gene to this marker * * <p>In case more than one 'closest' gene is found (e.g. two or more genes at the same distance). * The following rules apply: * * <p>i) If many genes have the same 'closest distance', coding genes are preferred. * * <p>ii) If more than one coding gene has the same 'closet distance', a random gene is returned. * * @param inputInterval */ public Gene queryClosestGene(Marker inputInterval) { int initialExtension = 1000; String chrName = inputInterval.getChromosomeName(); Chromosome chr = genome.getChromosome(chrName); if (chr == null) return null; if (chr.size() > 0) { // Extend interval to capture 'close' genes for (int extend = initialExtension; extend < chr.size(); extend *= 2) { int start = Math.max(inputInterval.getStart() - extend, 0); int end = inputInterval.getEnd() + extend; Marker extended = new Marker(chr, start, end, 1, ""); // Find all genes that intersect with the interval Markers markers = query(extended); Markers genes = new Markers(); int minDist = Integer.MAX_VALUE; for (Marker m : markers) { if (m instanceof Gene) { int dist = m.distance(inputInterval); if (dist < minDist) { genes.add(m); minDist = dist; } } } // Found something? if (genes.size() > 0) { // Find a gene having distance 'minDist'. Prefer coding genes Gene minDistGene = null; for (Marker m : genes) { int dist = m.distance(inputInterval); if (dist == minDist) { Gene gene = (Gene) m; if (minDistGene == null) minDistGene = gene; else if (!minDistGene.isProteinCoding() && gene.isProteinCoding()) minDistGene = gene; } } return minDistGene; } } } // Nothing found return null; }
/** * Count number of bases, for a given chromosome and marker type * * @param mtype * @param chr * @param markers * @return */ void countBases(String mtype, Chromosome chr, Markers markers) { String chrName = chr.getChromosomeName(); if (verbose) System.err.print(" " + chrName); // Initialize byte busy[] = new byte[chr.size()]; for (int i = 0; i < busy.length; i++) busy[i] = 0; for (Marker m : markers) { // Same marker type & same chromo? Count bases if (m.getChromosomeName().equals(chrName) && markerTypes.isType(m, mtype)) { for (int i = m.getStart(); i <= m.getEnd(); i++) busy[i] = 1; } } int latest = 0; for (int i = 0; i < busy.length; i++) { // Transition? Count another marker if ((i > 0) && (busy[i] != 0) && (busy[i - 1] == 0)) { if ((i - latest) <= readLength) countBases.inc(mtype, i - latest); // Intervals are less than one read away? Unify them else countMarkers.inc(mtype); } // Base busy? Count another base if (busy[i] != 0) { countBases.inc(mtype); latest = i; } } }
/** Count bases covered for each marker type */ public void countBases() { // --- // Add all markers // --- Markers markers = new Markers(); markers.add(snpEffectPredictor.getMarkers()); for (Gene gene : snpEffectPredictor.getGenome().getGenes()) { markers.add(gene); markers.add(gene.markers()); } for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr); // --- // Calculate raw counts // --- for (Marker m : markers) { String mtype = markerTypes.getType(m); String msubtype = markerTypes.getSubType(m); rawCountMarkers.inc(mtype); rawCountBases.inc(mtype, m.size()); // Count sub-types (if any) if (msubtype != null) { rawCountMarkers.inc(msubtype); rawCountBases.inc(msubtype, m.size()); } } // --- // Count number of bases for each marker type (overlap and join) // --- for (String mtype : rawCountMarkers.keysSorted()) { if (mtype.equals(Chromosome.class.getSimpleName())) continue; // We calculate chromosomes later (it's faster) if (verbose) System.err.print(mtype + ":"); if (countMarkers.get(mtype) == 0) { for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers); } if (verbose) System.err.println(""); } // Show chromosomes length String mtype = Chromosome.class.getSimpleName(); for (Chromosome chr : snpEffectPredictor.getGenome()) { countBases.inc(mtype, chr.size()); countMarkers.inc(mtype); } }
/** * Is the chromosome missing in this marker? * * @param marker * @return */ boolean isChromosomeMissing(Marker marker) { // Missing chromosome in marker? if (marker.getChromosome() == null) return true; // Missing chromosome in genome? String chrName = marker.getChromosomeName(); Chromosome chr = genome.getChromosome(chrName); if (chr == null) return true; // Chromosome length is 1 or less? if (chr.size() < 1) return true; // Tree not found in interval forest? if (!intervalForest.hasTree(chrName)) return true; // OK, we have the chromosome return false; }