/** * Count number of bases, for a given chromosome and marker type * * @param mtype * @param chr * @param markers * @return */ void countBases(String mtype, Chromosome chr, Markers markers) { String chrName = chr.getChromosomeName(); if (verbose) System.err.print(" " + chrName); // Initialize byte busy[] = new byte[chr.size()]; for (int i = 0; i < busy.length; i++) busy[i] = 0; for (Marker m : markers) { // Same marker type & same chromo? Count bases if (m.getChromosomeName().equals(chrName) && markerTypes.isType(m, mtype)) { for (int i = m.getStart(); i <= m.getEnd(); i++) busy[i] = 1; } } int latest = 0; for (int i = 0; i < busy.length; i++) { // Transition? Count another marker if ((i > 0) && (busy[i] != 0) && (busy[i - 1] == 0)) { if ((i - latest) <= readLength) countBases.inc(mtype, i - latest); // Intervals are less than one read away? Unify them else countMarkers.inc(mtype); } // Base busy? Count another base if (busy[i] != 0) { countBases.inc(mtype); latest = i; } } }
/** * Find closest gene to this marker * * <p>In case more than one 'closest' gene is found (e.g. two or more genes at the same distance). * The following rules apply: * * <p>i) If many genes have the same 'closest distance', coding genes are preferred. * * <p>ii) If more than one coding gene has the same 'closet distance', a random gene is returned. * * @param inputInterval */ public Gene queryClosestGene(Marker inputInterval) { int initialExtension = 1000; String chrName = inputInterval.getChromosomeName(); Chromosome chr = genome.getChromosome(chrName); if (chr == null) return null; if (chr.size() > 0) { // Extend interval to capture 'close' genes for (int extend = initialExtension; extend < chr.size(); extend *= 2) { int start = Math.max(inputInterval.getStart() - extend, 0); int end = inputInterval.getEnd() + extend; Marker extended = new Marker(chr, start, end, 1, ""); // Find all genes that intersect with the interval Markers markers = query(extended); Markers genes = new Markers(); int minDist = Integer.MAX_VALUE; for (Marker m : markers) { if (m instanceof Gene) { int dist = m.distance(inputInterval); if (dist < minDist) { genes.add(m); minDist = dist; } } } // Found something? if (genes.size() > 0) { // Find a gene having distance 'minDist'. Prefer coding genes Gene minDistGene = null; for (Marker m : genes) { int dist = m.distance(inputInterval); if (dist == minDist) { Gene gene = (Gene) m; if (minDistGene == null) minDistGene = gene; else if (!minDistGene.isProteinCoding() && gene.isProteinCoding()) minDistGene = gene; } } return minDistGene; } } } // Nothing found return null; }
/** * Is the chromosome missing in this marker? * * @param marker * @return */ boolean isChromosomeMissing(Marker marker) { // Missing chromosome in marker? if (marker.getChromosome() == null) return true; // Missing chromosome in genome? String chrName = marker.getChromosomeName(); Chromosome chr = genome.getChromosome(chrName); if (chr == null) return true; // Chromosome length is 1 or less? if (chr.size() < 1) return true; // Tree not found in interval forest? if (!intervalForest.hasTree(chrName)) return true; // OK, we have the chromosome return false; }