/** * Sample and calculate the probability of hitting each type of marker (marker.class). Creates * 'numReads' reads of size 'readLen' and count how many of them hit each marker type. */ CountByType randomSampling(int readLen, int numReads) { CountByType countReads = new CountByType(); RandMarker randMarker = new RandMarker(snpEffectPredictor.getGenome()); for (int i = 0; i < numReads; i++) { // Random read Marker read = randMarker.rand(readLen); // Where does it hit? Markers regions = snpEffectPredictor.queryDeep(read); HashSet<String> doneRegion = new HashSet<String>(); for (Marker m : regions) { String mtype = markerTypes.getType(m); String msubtype = markerTypes.getSubType(m); if (!doneRegion.contains(mtype)) { countReads.inc(mtype); // Count reads doneRegion.add(mtype); // Do not count twice } if ((msubtype != null) && !doneRegion.contains(msubtype)) { countReads.inc(msubtype); // Count reads doneRegion.add(msubtype); // Do not count twice } } } return countReads; }
/** Count bases covered for each marker type */ public void countBases() { // --- // Add all markers // --- Markers markers = new Markers(); markers.add(snpEffectPredictor.getMarkers()); for (Gene gene : snpEffectPredictor.getGenome().getGenes()) { markers.add(gene); markers.add(gene.markers()); } for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr); // --- // Calculate raw counts // --- for (Marker m : markers) { String mtype = markerTypes.getType(m); String msubtype = markerTypes.getSubType(m); rawCountMarkers.inc(mtype); rawCountBases.inc(mtype, m.size()); // Count sub-types (if any) if (msubtype != null) { rawCountMarkers.inc(msubtype); rawCountBases.inc(msubtype, m.size()); } } // --- // Count number of bases for each marker type (overlap and join) // --- for (String mtype : rawCountMarkers.keysSorted()) { if (mtype.equals(Chromosome.class.getSimpleName())) continue; // We calculate chromosomes later (it's faster) if (verbose) System.err.print(mtype + ":"); if (countMarkers.get(mtype) == 0) { for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers); } if (verbose) System.err.println(""); } // Show chromosomes length String mtype = Chromosome.class.getSimpleName(); for (Chromosome chr : snpEffectPredictor.getGenome()) { countBases.inc(mtype, chr.size()); countMarkers.inc(mtype); } }
/** * Count number of bases, for a given chromosome and marker type * * @param mtype * @param chr * @param markers * @return */ void countBases(String mtype, Chromosome chr, Markers markers) { String chrName = chr.getChromosomeName(); if (verbose) System.err.print(" " + chrName); // Initialize byte busy[] = new byte[chr.size()]; for (int i = 0; i < busy.length; i++) busy[i] = 0; for (Marker m : markers) { // Same marker type & same chromo? Count bases if (m.getChromosomeName().equals(chrName) && markerTypes.isType(m, mtype)) { for (int i = m.getStart(); i <= m.getEnd(); i++) busy[i] = 1; } } int latest = 0; for (int i = 0; i < busy.length; i++) { // Transition? Count another marker if ((i > 0) && (busy[i] != 0) && (busy[i - 1] == 0)) { if ((i - latest) <= readLength) countBases.inc(mtype, i - latest); // Intervals are less than one read away? Unify them else countMarkers.inc(mtype); } // Base busy? Count another base if (busy[i] != 0) { countBases.inc(mtype); latest = i; } } }
/** * Save model to file * * @param fileName */ public void save(String fileName) { StringBuilder sb = new StringBuilder(); sb.append("marker_type\tsize\tcount\tbinomial_p\n"); probabilities(); for (String mtype : markerTypes.markerTypesClass()) sb.append(mtype + "\t" + countBases.get(mtype) + "\t" + countMarkers.get(mtype) + "\n"); Gpr.toFile(fileName, sb.toString()); }