/** * Return a collection of intervals that intersect 'marker' Query resulting genes, transcripts and * exons to get ALL types of intervals possible * * @return */ public Markers queryDeep(Marker marker) { if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(marker)) throw new RuntimeEOFException("Chromosome missing for marker: " + marker); boolean hitChromo = false; Markers hits = new Markers(); Markers intersects = query(marker); if (intersects.size() > 0) { for (Marker m : intersects) { hits.add(m); if (m instanceof Chromosome) { hitChromo = true; // OK (we have to hit a chromosome, otherwise it's an error } else if (m instanceof Gene) { // Analyze Genes Gene gene = (Gene) m; hits.addAll(gene.query(marker)); } } } if (!hitChromo && Config.get().isErrorChromoHit()) throw new RuntimeException("ERROR: Out of chromosome range. " + marker); return hits; }
/** * Predict the effect of a seqChange * * @param seqChange : Sequence change * @param seqChangeRef : Before analyzing results, we have to change markers using seqChangerRef * to create a new reference 'on the fly' */ public ChangeEffects seqChangeEffect(Variant seqChange, Variant seqChangeRef) { ChangeEffects changeEffects = new ChangeEffects(seqChange, seqChangeRef); // --- // Chromosome missing? // --- if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(seqChange)) { changeEffects.addErrorWarning(ErrorWarningType.ERROR_CHROMOSOME_NOT_FOUND); return changeEffects; } // --- // Check that this is not a huge deletion. // Huge deletions would crash the rest of the algorithm, so we need to stop them here. // --- if (seqChange.isDel() && (seqChange.size() > HUGE_DELETION_SIZE_THRESHOLD)) { // Get chromosome String chromoName = seqChange.getChromosomeName(); Chromosome chr = genome.getChromosome(chromoName); if (chr.size() > 0) { double ratio = seqChange.size() / ((double) chr.size()); if (ratio > HUGE_DELETION_RATIO_THRESHOLD) { changeEffects.add(chr, EffectType.CHROMOSOME_LARGE_DELETION, ""); return changeEffects; } } } // --- // Query interval tree: Which intervals does seqChange intersect? // --- Markers intersects = query(seqChange); // Show all results boolean hitChromo = false, hitSomething = false; if (intersects.size() > 0) { for (Marker marker : intersects) { if (marker instanceof Chromosome) hitChromo = true; // Do we hit any chromosome? else { // Analyze all markers marker.seqChangeEffect(seqChange, changeEffects, seqChangeRef); hitSomething = true; } } } // Any errors or intergenic (i.e. did not hit any gene) if (!hitChromo) { if (Config.get().isErrorChromoHit()) changeEffects.addErrorWarning(ErrorWarningType.ERROR_OUT_OF_CHROMOSOME_RANGE); } else if (!hitSomething) { if (Config.get().isOnlyRegulation()) changeEffects.setEffectType(EffectType.NONE); else changeEffects.setEffectType(EffectType.INTERGENIC); } return changeEffects; }
/** Count bases covered for each marker type */ public void countBases() { // --- // Add all markers // --- Markers markers = new Markers(); markers.add(snpEffectPredictor.getMarkers()); for (Gene gene : snpEffectPredictor.getGenome().getGenes()) { markers.add(gene); markers.add(gene.markers()); } for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr); // --- // Calculate raw counts // --- for (Marker m : markers) { String mtype = markerTypes.getType(m); String msubtype = markerTypes.getSubType(m); rawCountMarkers.inc(mtype); rawCountBases.inc(mtype, m.size()); // Count sub-types (if any) if (msubtype != null) { rawCountMarkers.inc(msubtype); rawCountBases.inc(msubtype, m.size()); } } // --- // Count number of bases for each marker type (overlap and join) // --- for (String mtype : rawCountMarkers.keysSorted()) { if (mtype.equals(Chromosome.class.getSimpleName())) continue; // We calculate chromosomes later (it's faster) if (verbose) System.err.print(mtype + ":"); if (countMarkers.get(mtype) == 0) { for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers); } if (verbose) System.err.println(""); } // Show chromosomes length String mtype = Chromosome.class.getSimpleName(); for (Chromosome chr : snpEffectPredictor.getGenome()) { countBases.inc(mtype, chr.size()); countMarkers.inc(mtype); } }
/** * Find closest gene to this marker * * <p>In case more than one 'closest' gene is found (e.g. two or more genes at the same distance). * The following rules apply: * * <p>i) If many genes have the same 'closest distance', coding genes are preferred. * * <p>ii) If more than one coding gene has the same 'closet distance', a random gene is returned. * * @param inputInterval */ public Gene queryClosestGene(Marker inputInterval) { int initialExtension = 1000; String chrName = inputInterval.getChromosomeName(); Chromosome chr = genome.getChromosome(chrName); if (chr == null) return null; if (chr.size() > 0) { // Extend interval to capture 'close' genes for (int extend = initialExtension; extend < chr.size(); extend *= 2) { int start = Math.max(inputInterval.getStart() - extend, 0); int end = inputInterval.getEnd() + extend; Marker extended = new Marker(chr, start, end, 1, ""); // Find all genes that intersect with the interval Markers markers = query(extended); Markers genes = new Markers(); int minDist = Integer.MAX_VALUE; for (Marker m : markers) { if (m instanceof Gene) { int dist = m.distance(inputInterval); if (dist < minDist) { genes.add(m); minDist = dist; } } } // Found something? if (genes.size() > 0) { // Find a gene having distance 'minDist'. Prefer coding genes Gene minDistGene = null; for (Marker m : genes) { int dist = m.distance(inputInterval); if (dist == minDist) { Gene gene = (Gene) m; if (minDistGene == null) minDistGene = gene; else if (!minDistGene.isProteinCoding() && gene.isProteinCoding()) minDistGene = gene; } } return minDistGene; } } } // Nothing found return null; }
/** Save nextprot markers */ void save() { String nextProtBinFile = config.getDirDataVersion() + "/nextProt.bin"; if (verbose) Timer.showStdErr("Saving database to file '" + nextProtBinFile + "'"); // Add chromosomes HashSet<Chromosome> chromos = new HashSet<Chromosome>(); for (Marker m : markers) chromos.add(m.getChromosome()); // Create a set of all markers to be saved Markers markersToSave = new Markers(); markersToSave.add(genome); for (Chromosome chr : chromos) markersToSave.add(chr); for (Marker m : markers) markersToSave.add(m); // Save MarkerSerializer markerSerializer = new MarkerSerializer(); markerSerializer.save(nextProtBinFile, markersToSave); }
/** * Create (and add) up-down stream, splice sites, intergenic, etc * * @return */ public Markers createGenomicRegions() { Markers markers = new Markers(); // Add up-down stream intervals for (Marker upDownStream : genome.getGenes().createUpDownStream(upDownStreamLength)) markers.add(upDownStream); // Add splice site intervals for (Marker spliceSite : genome .getGenes() .createSpliceSites( spliceSiteSize, spliceRegionExonSize, spliceRegionIntronMin, spliceRegionIntronMax)) markers.add(spliceSite); // Intergenic markers for (Intergenic intergenic : genome.getGenes().createIntergenic()) markers.add(intergenic); return markers; }
/** Create interval trees (forest) */ public void buildForest() { intervalForest = new IntervalForest(); // Add all chromosomes to forest if (useChromosomes) { for (Chromosome chr : genome) intervalForest.add(chr); } // Add all genes to forest for (Gene gene : genome.getGenes()) intervalForest.add(gene); // --- // Create (and add) up-down stream, splice sites, intergenic, etc // --- markers.add(createGenomicRegions()); // Add all 'markers' to forest (includes custom intervals) intervalForest.add(markers); // Build interval forest intervalForest.build(); }
/** * Parse a protein node * * @param node */ void parseAnnotation(Node ann, String geneId, String category) { // Description Node descr = findOneNode(ann, NODE_NAME_DESCRIPTION, null, null, null); String description = getText(descr); if (description == null) description = ""; else if (description.indexOf(';') > 0) description = description.substring(0, description.indexOf(';')); // Cut after semicolon // Controlled vocabulary Node cv = findOneNode(ann, NODE_NAME_CVNAME, null, null, null); String contrVoc = getText(cv); if (contrVoc == null) contrVoc = ""; contrVoc.indexOf(';'); String cvs[] = contrVoc.split(";", 2); String contrVoc2 = ""; if (cvs.length > 1) { contrVoc = cvs[0]; contrVoc2 = cvs[1]; } // Search annotations List<Node> posNodes = findNodes(ann, NODE_NAME_POSITION, null, null, null); for (Node pos : posNodes) { // Get first & last position String first = getAttribute(pos, ATTR_NAME_FIRST); String last = getAttribute(pos, ATTR_NAME_LAST); int aaStart = Gpr.parseIntSafe(first) - 1; int aaEnd = Gpr.parseIntSafe(last) - 1; int len = aaEnd - aaStart + 1; // Get ID Node isoAnn = pos.getParentNode().getParentNode(); String isoformRef = getAttribute(isoAnn, ATTR_NAME_ISOFORM_REF); // Find sequence String sequence = sequenceByUniqueName.get(isoformRef); String subSeq = ""; if ((sequence != null) && (aaStart >= 0) && (aaEnd >= aaStart)) subSeq = sequence.substring(aaStart, aaEnd + 1); // Check transcript TranscriptData trData = transcriptData(isoformRef, aaStart, aaEnd, sequence, subSeq); // Create nextProt markers if (trData.ok && (len > 0)) { if (debug) System.out.println( geneId // + "\t" + isoformRef // + "\t" + trData.tr.getId() // + "\t" + category // + "\t" + description // + "\t" + contrVoc // + "\t" + contrVoc2 // + "\t" + first // + "\t" + last // + "\t" + len // + "\t" + trData.chrName // + "\t" + trData.chrPosStart // + "\t" + trData.chrPosEnd // + "\t" + subSeq // + "\t" + trData.codon // + "\t" + trData.aa // ); // Create marker String id = key(category, contrVoc, description); NextProt nextProt = new NextProt(trData.tr, trData.chrPosStart, trData.chrPosEnd, id); markers.add(nextProt); // if (subSeq.length() == 1) countAaSequence(category, contrVoc, description, subSeq); countAaSequence(category, contrVoc, description, subSeq); } } }
/** * Name of the regions hit by a marker * * @param marker * @param showGeneDetails * @param compareTemplate * @param id : Only use genes or transcripts matching this ID * @return */ public Set<String> regions( Marker marker, boolean showGeneDetails, boolean compareTemplate, String id) { if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(marker)) throw new RuntimeEOFException("Chromosome missing for marker: " + marker); boolean hitChromo = false; HashSet<String> hits = new HashSet<String>(); Markers intersects = query(marker); if (intersects.size() > 0) { for (Marker markerInt : intersects) { if (markerInt instanceof Chromosome) { hitChromo = true; // OK (we have to hit a chromosome, otherwise it's an error hits.add(markerInt.getClass().getSimpleName()); // Add marker name to the list } else if (markerInt instanceof Gene) { // Analyze Genes Gene gene = (Gene) markerInt; regionsAddHit(hits, gene, marker, showGeneDetails, compareTemplate); // For all transcripts... for (Transcript tr : gene) { if ((id == null) || gene.getId().equals(id) || tr.getId().equals(id)) { // Mathes ID? (...or no ID to match) // Does it intersect this transcript? if (tr.intersects(marker)) { regionsAddHit(hits, tr, marker, showGeneDetails, compareTemplate); // Does it intersect a UTR? for (Utr utr : tr.getUtrs()) if (utr.intersects(marker)) regionsAddHit(hits, utr, marker, showGeneDetails, compareTemplate); // Does it intersect an exon? for (Exon ex : tr) if (ex.intersects(marker)) regionsAddHit(hits, ex, marker, showGeneDetails, compareTemplate); // Does it intersect an intron? for (Intron intron : tr.introns()) if (intron.intersects(marker)) regionsAddHit(hits, intron, marker, showGeneDetails, compareTemplate); } } } } else { // No ID to match? if (id == null) regionsAddHit(hits, markerInt, marker, showGeneDetails, compareTemplate); else { // Is ID from transcript? Transcript tr = (Transcript) markerInt.findParent(Transcript.class); if ((tr != null) && (tr.getId().equals(id))) { regionsAddHit( hits, markerInt, marker, showGeneDetails, compareTemplate); // Transcript ID matches => count } else { // Is ID from gene? Gene gene = (Gene) markerInt.findParent(Gene.class); if ((gene != null) && (gene.getId().equals(id))) regionsAddHit( hits, markerInt, marker, showGeneDetails, compareTemplate); // Gene ID matches => count } } } } } if (!hitChromo) throw new RuntimeException("ERROR: Out of chromosome range. " + marker); return hits; }
/** * Add a set of markers * * @param markersToAdd */ public void addAll(Markers markersToAdd) { for (Marker marker : markersToAdd) markers.add(marker); }
/** * Add a marker * * <p>Note: Markers have to be added BEFORE building the interval trees. Interval trees are built * the first time you call snpEffect(snp) method. * * @param marker */ public void add(Marker marker) { markers.add(marker); }