   * Sample and calculate the probability of hitting each type of marker (marker.class). Creates
   * 'numReads' reads of size 'readLen' and count how many of them hit each marker type.
  CountByType randomSampling(int readLen, int numReads) {
    CountByType countReads = new CountByType();
    RandMarker randMarker = new RandMarker(snpEffectPredictor.getGenome());

    for (int i = 0; i < numReads; i++) {
      // Random read
      Marker read = randMarker.rand(readLen);

      // Where does it hit?
      Markers regions = snpEffectPredictor.queryDeep(read);
      HashSet<String> doneRegion = new HashSet<String>();
      for (Marker m : regions) {
        String mtype = markerTypes.getType(m);
        String msubtype = markerTypes.getSubType(m);

        if (!doneRegion.contains(mtype)) {
          countReads.inc(mtype); // Count reads
          doneRegion.add(mtype); // Do not count twice

        if ((msubtype != null) && !doneRegion.contains(msubtype)) {
          countReads.inc(msubtype); // Count reads
          doneRegion.add(msubtype); // Do not count twice

    return countReads;
   * Count number of bases, for a given chromosome and marker type
   * @param mtype
   * @param chr
   * @param markers
   * @return
  void countBases(String mtype, Chromosome chr, Markers markers) {
    String chrName = chr.getChromosomeName();
    if (verbose) System.err.print(" " + chrName);

    // Initialize
    byte busy[] = new byte[chr.size()];
    for (int i = 0; i < busy.length; i++) busy[i] = 0;

    for (Marker m : markers) {
      // Same marker type & same chromo? Count bases
      if (m.getChromosomeName().equals(chrName) && markerTypes.isType(m, mtype)) {
        for (int i = m.getStart(); i <= m.getEnd(); i++) busy[i] = 1;

    int latest = 0;
    for (int i = 0; i < busy.length; i++) {
      // Transition? Count another marker
      if ((i > 0) && (busy[i] != 0) && (busy[i - 1] == 0)) {
        if ((i - latest) <= readLength)
          countBases.inc(mtype, i - latest); // Intervals are less than one read away? Unify them
        else countMarkers.inc(mtype);

      // Base busy? Count another base
      if (busy[i] != 0) {
        latest = i;
   * Save model to file
   * @param fileName
  public void save(String fileName) {
    StringBuilder sb = new StringBuilder();

    for (String mtype : markerTypes.markerTypesClass())
      sb.append(mtype + "\t" + countBases.get(mtype) + "\t" + countMarkers.get(mtype) + "\n");

    Gpr.toFile(fileName, sb.toString());
   * Sample and calculate the probability of hitting each type of marker (marker.class). Creates
   * 'numReads' reads of size 'readLen' and count how many of them hit each marker type. Iterate
   * 'iterations' times to obtain a distribution.
  public void randomSampling(int iterations, int readLen, int numReads) {
    for (String type : rawCountMarkers.keysSorted()) System.out.print("\t" + type);

    for (int it = 0; it < iterations; it++) {
      CountByType count = randomSampling(readLen, numReads);
      for (String type : rawCountMarkers.keysSorted()) System.out.print("\t" + count.get(type));
   * Load data from a file
   * @param fileName
  public void load(String fileName) {

    boolean header = true;
    for (String line : Gpr.readFile(fileName).split("\n")) {
      if (header) {
        header = false;

      // Split line and parse data
      String recs[] = line.split("\t");

      String mtype = recs[0];
      countBases.inc(mtype, Gpr.parseIntSafe(recs[1]));
      countMarkers.inc(mtype, Gpr.parseIntSafe(recs[2]));
  /** Count bases covered for each marker type */
  public void countBases() {
    // ---
    // Add all markers
    // ---
    Markers markers = new Markers();
    for (Gene gene : snpEffectPredictor.getGenome().getGenes()) {

    for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr);

    // ---
    // Calculate raw counts
    // ---
    for (Marker m : markers) {
      String mtype = markerTypes.getType(m);
      String msubtype = markerTypes.getSubType(m);

      rawCountBases.inc(mtype, m.size());

      // Count sub-types (if any)
      if (msubtype != null) {
        rawCountBases.inc(msubtype, m.size());

    // ---
    // Count number of bases for each marker type (overlap and join)
    // ---
    for (String mtype : rawCountMarkers.keysSorted()) {
      if (mtype.equals(Chromosome.class.getSimpleName()))
        continue; // We calculate chromosomes later (it's faster)

      if (verbose) System.err.print(mtype + ":");

      if (countMarkers.get(mtype) == 0) {
        for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers);

      if (verbose) System.err.println("");

    // Show chromosomes length
    String mtype = Chromosome.class.getSimpleName();
    for (Chromosome chr : snpEffectPredictor.getGenome()) {
      countBases.inc(mtype, chr.size());
  /** Calculate probabilities */
  void probabilities() {
    // Already done, nothing to do
    if (prob != null) return;

    // Get total length and count for chromosomes (chromosome size is total genome length)
    String chrType = Chromosome.class.getSimpleName();
    long chrSize = countBases.get(chrType);
    long chrCount = countMarkers.get(chrType);
    if (chrCount <= 0) return; // Zero length genome? Forgot to count bases?

    // Correct readLength
    int readLength = this.readLength;
    if (readLength < 1) readLength = 1;

    // Probabilities for each marker
    prob = new CountByType();
    for (String mtype : countMarkers.keysSorted()) {
      long size = countBases.get(mtype);
      long count = countMarkers.get(mtype);

      // Calculate and cap probability value
      double p =
          ((double) (size + (readLength - 1) * count))
              / ((double) (chrSize - (readLength - 1) * chrCount));
      p = Math.min(1.0, p);
      p = Math.max(0.0, p);

      prob.setScore(mtype, p);
  public String toString() {
    StringBuilder sb = new StringBuilder();

    for (String mtype : countMarkers.keysSorted())
              + "\t"
              + countBases.get(mtype)
              + "\t"
              + countMarkers.get(mtype)
              + "\t"
              + rawCountBases.get(mtype)
              + "\t"
              + rawCountMarkers.get(mtype)
              + "\t"
              + prob.getScore(mtype)
              + "\n");

    return sb.toString();