/** Calculate probabilities */ void probabilities() { // Already done, nothing to do if (prob != null) return; // Get total length and count for chromosomes (chromosome size is total genome length) String chrType = Chromosome.class.getSimpleName(); long chrSize = countBases.get(chrType); long chrCount = countMarkers.get(chrType); if (chrCount <= 0) return; // Zero length genome? Forgot to count bases? // Correct readLength int readLength = this.readLength; if (readLength < 1) readLength = 1; // Probabilities for each marker prob = new CountByType(); for (String mtype : countMarkers.keysSorted()) { long size = countBases.get(mtype); long count = countMarkers.get(mtype); // Calculate and cap probability value double p = ((double) (size + (readLength - 1) * count)) / ((double) (chrSize - (readLength - 1) * chrCount)); p = Math.min(1.0, p); p = Math.max(0.0, p); prob.setScore(mtype, p); } }
/** * Save model to file * * @param fileName */ public void save(String fileName) { StringBuilder sb = new StringBuilder(); sb.append("marker_type\tsize\tcount\tbinomial_p\n"); probabilities(); for (String mtype : markerTypes.markerTypesClass()) sb.append(mtype + "\t" + countBases.get(mtype) + "\t" + countMarkers.get(mtype) + "\n"); Gpr.toFile(fileName, sb.toString()); }
/** Count bases covered for each marker type */ public void countBases() { // --- // Add all markers // --- Markers markers = new Markers(); markers.add(snpEffectPredictor.getMarkers()); for (Gene gene : snpEffectPredictor.getGenome().getGenes()) { markers.add(gene); markers.add(gene.markers()); } for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr); // --- // Calculate raw counts // --- for (Marker m : markers) { String mtype = markerTypes.getType(m); String msubtype = markerTypes.getSubType(m); rawCountMarkers.inc(mtype); rawCountBases.inc(mtype, m.size()); // Count sub-types (if any) if (msubtype != null) { rawCountMarkers.inc(msubtype); rawCountBases.inc(msubtype, m.size()); } } // --- // Count number of bases for each marker type (overlap and join) // --- for (String mtype : rawCountMarkers.keysSorted()) { if (mtype.equals(Chromosome.class.getSimpleName())) continue; // We calculate chromosomes later (it's faster) if (verbose) System.err.print(mtype + ":"); if (countMarkers.get(mtype) == 0) { for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers); } if (verbose) System.err.println(""); } // Show chromosomes length String mtype = Chromosome.class.getSimpleName(); for (Chromosome chr : snpEffectPredictor.getGenome()) { countBases.inc(mtype, chr.size()); countMarkers.inc(mtype); } }
/** * Sample and calculate the probability of hitting each type of marker (marker.class). Creates * 'numReads' reads of size 'readLen' and count how many of them hit each marker type. Iterate * 'iterations' times to obtain a distribution. */ public void randomSampling(int iterations, int readLen, int numReads) { System.out.print("Iteration"); for (String type : rawCountMarkers.keysSorted()) System.out.print("\t" + type); System.out.println(""); for (int it = 0; it < iterations; it++) { CountByType count = randomSampling(readLen, numReads); System.out.print(it); for (String type : rawCountMarkers.keysSorted()) System.out.print("\t" + count.get(type)); System.out.println(""); } }
@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("marker_type\tsize\tcount\traw_size\traw_count\tbinomial_p\n"); probabilities(); for (String mtype : countMarkers.keysSorted()) sb.append( mtype + "\t" + countBases.get(mtype) + "\t" + countMarkers.get(mtype) + "\t" + rawCountBases.get(mtype) + "\t" + rawCountMarkers.get(mtype) + "\t" + prob.getScore(mtype) + "\n"); return sb.toString(); }