/** * Sample and calculate the probability of hitting each type of marker (marker.class). Creates * 'numReads' reads of size 'readLen' and count how many of them hit each marker type. */ CountByType randomSampling(int readLen, int numReads) { CountByType countReads = new CountByType(); RandMarker randMarker = new RandMarker(snpEffectPredictor.getGenome()); for (int i = 0; i < numReads; i++) { // Random read Marker read = randMarker.rand(readLen); // Where does it hit? Markers regions = snpEffectPredictor.queryDeep(read); HashSet<String> doneRegion = new HashSet<String>(); for (Marker m : regions) { String mtype = markerTypes.getType(m); String msubtype = markerTypes.getSubType(m); if (!doneRegion.contains(mtype)) { countReads.inc(mtype); // Count reads doneRegion.add(mtype); // Do not count twice } if ((msubtype != null) && !doneRegion.contains(msubtype)) { countReads.inc(msubtype); // Count reads doneRegion.add(msubtype); // Do not count twice } } } return countReads; }
/** Build a genome from a GFF3 file and compare results to 'expected' results */ public SnpEffectPredictor buildAndCompare( String genome, String gff3File, String resultFile, boolean readSeqs, boolean createRandSequences) { String expectedResult = (resultFile == null ? "" : Gpr.readFile(resultFile).trim()); // Build Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE); SnpEffPredictorFactoryGff3 fgff3 = new SnpEffPredictorFactoryGff3(config); fgff3.setVerbose(verbose); fgff3.setFileName(gff3File); fgff3.setReadSequences(readSeqs); fgff3.setCreateRandSequences(createRandSequences); fgff3.setRandom( new Random( 20140410)); // Note: we want consistent results in our test cases, so we always // initialize the random generator in the same way SnpEffectPredictor sep = fgff3.create(); // Compare result String result = show(sep.getGenome()).trim(); if (verbose || !Gpr.noSpaces(expectedResult).equals(Gpr.noSpaces(result))) System.out.println("Result:\n----------\n" + result + "\n----------\n"); Assert.assertEquals(Gpr.noSpaces(expectedResult), Gpr.noSpaces(result)); return sep; }
/** Count bases covered for each marker type */ public void countBases() { // --- // Add all markers // --- Markers markers = new Markers(); markers.add(snpEffectPredictor.getMarkers()); for (Gene gene : snpEffectPredictor.getGenome().getGenes()) { markers.add(gene); markers.add(gene.markers()); } for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr); // --- // Calculate raw counts // --- for (Marker m : markers) { String mtype = markerTypes.getType(m); String msubtype = markerTypes.getSubType(m); rawCountMarkers.inc(mtype); rawCountBases.inc(mtype, m.size()); // Count sub-types (if any) if (msubtype != null) { rawCountMarkers.inc(msubtype); rawCountBases.inc(msubtype, m.size()); } } // --- // Count number of bases for each marker type (overlap and join) // --- for (String mtype : rawCountMarkers.keysSorted()) { if (mtype.equals(Chromosome.class.getSimpleName())) continue; // We calculate chromosomes later (it's faster) if (verbose) System.err.print(mtype + ":"); if (countMarkers.get(mtype) == 0) { for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers); } if (verbose) System.err.println(""); } // Show chromosomes length String mtype = Chromosome.class.getSimpleName(); for (Chromosome chr : snpEffectPredictor.getGenome()) { countBases.inc(mtype, chr.size()); countMarkers.inc(mtype); } }
public void testCase_05_PaeruPA14muccA() { String genome = "paeru.PA14"; String gff3File = "tests/paeru.PA14.muccA.gff"; String resultFile = "tests/paeru.PA14.muccA.txt"; SnpEffectPredictor sep = buildAndCompare(genome, gff3File, resultFile, true, false); // Make sure no splice site is added Gene gene = sep.getGenome().getGenes().iterator().next(); Transcript tr = gene.iterator().next(); List<SpliceSite> spliceSites = tr.createSpliceSites(SpliceSite.CORE_SPLICE_SITE_SIZE, 0, 0, 0); Assert.assertEquals(0, spliceSites.size()); }
/** * Run main analysis * * @param xmlFileName */ @Override public boolean run() { // Initialzie readConfig(); // Read config file if (verbose) Timer.showStdErr( "Reading database for genome version '" + genomeVer + "' from file '" + config.getFileSnpEffectPredictor() + "' (this might take a while)"); SnpEffectPredictor snpEffectPredictor = config.loadSnpEffectPredictor(); genome = config.getGenome(); if (verbose) Timer.showStdErr("done"); // Build transcript map for (Gene gene : snpEffectPredictor.getGenome().getGenes()) for (Transcript tr : gene) trById.put(tr.getId(), tr); // Parse all XML files in directory String files[] = (new File(xmlDirName)).list(); for (String xmlFileName : files) { if (xmlFileName.endsWith(".xml.gz") || xmlFileName.endsWith(".xml")) { String path = xmlDirName + "/" + xmlFileName; parse(path); } } // Show stats if (verbose) Timer.showStdErr( "Proteing sequences:" // + "\n\tMatch : " + proteinOk.size() // + "\n\tDifferences : " + proteinDifferences.size() // + "\n\tAA errros : " + aaErrors // ); analyzeSequenceConservation(); // Save database save(); if (verbose) Timer.showStdErr("Done!"); return true; }
/** * Build a genome from a GFF3 file and compare results to 'expected' results * * @param genome * @param gff3File * @param resultFile */ public SnpEffectPredictor buildAndCompare( String genome, String gff3File, String resultFile, boolean readSeqs, boolean createRandSequences) { String expectedResult = (resultFile == null ? "" : Gpr.readFile(resultFile).trim()); // Build Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE); SnpEffPredictorFactoryGff3 fgff3 = new SnpEffPredictorFactoryGff3(config); fgff3.setFileName(gff3File); fgff3.setReadSequences(readSeqs); fgff3.setCreateRandSequences(createRandSequences); SnpEffectPredictor sep = fgff3.create(); // Compare result String result = show(sep.getGenome()).trim(); System.out.println("Result:\n----------\n" + result + "\n----------\n"); Assert.assertEquals(Gpr.noSpaces(expectedResult), Gpr.noSpaces(result)); return sep; }