/**
   * Count number of bases, for a given chromosome and marker type
   *
   * @param mtype
   * @param chr
   * @param markers
   * @return
   */
  void countBases(String mtype, Chromosome chr, Markers markers) {
    String chrName = chr.getChromosomeName();
    if (verbose) System.err.print(" " + chrName);

    // Initialize
    byte busy[] = new byte[chr.size()];
    for (int i = 0; i < busy.length; i++) busy[i] = 0;

    for (Marker m : markers) {
      // Same marker type & same chromo? Count bases
      if (m.getChromosomeName().equals(chrName) && markerTypes.isType(m, mtype)) {
        for (int i = m.getStart(); i <= m.getEnd(); i++) busy[i] = 1;
      }
    }

    int latest = 0;
    for (int i = 0; i < busy.length; i++) {
      // Transition? Count another marker
      if ((i > 0) && (busy[i] != 0) && (busy[i - 1] == 0)) {
        if ((i - latest) <= readLength)
          countBases.inc(mtype, i - latest); // Intervals are less than one read away? Unify them
        else countMarkers.inc(mtype);
      }

      // Base busy? Count another base
      if (busy[i] != 0) {
        countBases.inc(mtype);
        latest = i;
      }
    }
  }
Пример #2
0
  /**
   * Predict the effect of a seqChange
   *
   * @param seqChange : Sequence change
   * @param seqChangeRef : Before analyzing results, we have to change markers using seqChangerRef
   *     to create a new reference 'on the fly'
   */
  public ChangeEffects seqChangeEffect(Variant seqChange, Variant seqChangeRef) {
    ChangeEffects changeEffects = new ChangeEffects(seqChange, seqChangeRef);

    // ---
    // Chromosome missing?
    // ---
    if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(seqChange)) {
      changeEffects.addErrorWarning(ErrorWarningType.ERROR_CHROMOSOME_NOT_FOUND);
      return changeEffects;
    }

    // ---
    // Check that this is not a huge deletion.
    // Huge deletions would crash the rest of the algorithm, so we need to stop them here.
    // ---
    if (seqChange.isDel() && (seqChange.size() > HUGE_DELETION_SIZE_THRESHOLD)) {
      // Get chromosome
      String chromoName = seqChange.getChromosomeName();
      Chromosome chr = genome.getChromosome(chromoName);

      if (chr.size() > 0) {
        double ratio = seqChange.size() / ((double) chr.size());
        if (ratio > HUGE_DELETION_RATIO_THRESHOLD) {
          changeEffects.add(chr, EffectType.CHROMOSOME_LARGE_DELETION, "");
          return changeEffects;
        }
      }
    }

    // ---
    // Query interval tree: Which intervals does seqChange intersect?
    // ---
    Markers intersects = query(seqChange);

    // Show all results
    boolean hitChromo = false, hitSomething = false;
    if (intersects.size() > 0) {
      for (Marker marker : intersects) {
        if (marker instanceof Chromosome) hitChromo = true; // Do we hit any chromosome?
        else { // Analyze all markers
          marker.seqChangeEffect(seqChange, changeEffects, seqChangeRef);
          hitSomething = true;
        }
      }
    }

    // Any errors or intergenic (i.e. did not hit any gene)
    if (!hitChromo) {
      if (Config.get().isErrorChromoHit())
        changeEffects.addErrorWarning(ErrorWarningType.ERROR_OUT_OF_CHROMOSOME_RANGE);
    } else if (!hitSomething) {
      if (Config.get().isOnlyRegulation()) changeEffects.setEffectType(EffectType.NONE);
      else changeEffects.setEffectType(EffectType.INTERGENIC);
    }

    return changeEffects;
  }
  /** Count bases covered for each marker type */
  public void countBases() {
    // ---
    // Add all markers
    // ---
    Markers markers = new Markers();
    markers.add(snpEffectPredictor.getMarkers());
    for (Gene gene : snpEffectPredictor.getGenome().getGenes()) {
      markers.add(gene);
      markers.add(gene.markers());
    }

    for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr);

    // ---
    // Calculate raw counts
    // ---
    for (Marker m : markers) {
      String mtype = markerTypes.getType(m);
      String msubtype = markerTypes.getSubType(m);

      rawCountMarkers.inc(mtype);
      rawCountBases.inc(mtype, m.size());

      // Count sub-types (if any)
      if (msubtype != null) {
        rawCountMarkers.inc(msubtype);
        rawCountBases.inc(msubtype, m.size());
      }
    }

    // ---
    // Count number of bases for each marker type (overlap and join)
    // ---
    for (String mtype : rawCountMarkers.keysSorted()) {
      if (mtype.equals(Chromosome.class.getSimpleName()))
        continue; // We calculate chromosomes later (it's faster)

      if (verbose) System.err.print(mtype + ":");

      if (countMarkers.get(mtype) == 0) {
        for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers);
      }

      if (verbose) System.err.println("");
    }

    // Show chromosomes length
    String mtype = Chromosome.class.getSimpleName();
    for (Chromosome chr : snpEffectPredictor.getGenome()) {
      countBases.inc(mtype, chr.size());
      countMarkers.inc(mtype);
    }
  }
Пример #4
0
  /**
   * Find closest gene to this marker
   *
   * <p>In case more than one 'closest' gene is found (e.g. two or more genes at the same distance).
   * The following rules apply:
   *
   * <p>i) If many genes have the same 'closest distance', coding genes are preferred.
   *
   * <p>ii) If more than one coding gene has the same 'closet distance', a random gene is returned.
   *
   * @param inputInterval
   */
  public Gene queryClosestGene(Marker inputInterval) {
    int initialExtension = 1000;

    String chrName = inputInterval.getChromosomeName();
    Chromosome chr = genome.getChromosome(chrName);
    if (chr == null) return null;

    if (chr.size() > 0) {
      // Extend interval to capture 'close' genes
      for (int extend = initialExtension; extend < chr.size(); extend *= 2) {
        int start = Math.max(inputInterval.getStart() - extend, 0);
        int end = inputInterval.getEnd() + extend;
        Marker extended = new Marker(chr, start, end, 1, "");

        // Find all genes that intersect with the interval
        Markers markers = query(extended);
        Markers genes = new Markers();
        int minDist = Integer.MAX_VALUE;
        for (Marker m : markers) {
          if (m instanceof Gene) {
            int dist = m.distance(inputInterval);
            if (dist < minDist) {
              genes.add(m);
              minDist = dist;
            }
          }
        }

        // Found something?
        if (genes.size() > 0) {
          // Find a gene having distance 'minDist'. Prefer coding genes
          Gene minDistGene = null;

          for (Marker m : genes) {
            int dist = m.distance(inputInterval);
            if (dist == minDist) {
              Gene gene = (Gene) m;
              if (minDistGene == null) minDistGene = gene;
              else if (!minDistGene.isProteinCoding() && gene.isProteinCoding()) minDistGene = gene;
            }
          }

          return minDistGene;
        }
      }
    }

    // Nothing found
    return null;
  }
Пример #5
0
  /**
   * Is the chromosome missing in this marker?
   *
   * @param marker
   * @return
   */
  boolean isChromosomeMissing(Marker marker) {
    // Missing chromosome in marker?
    if (marker.getChromosome() == null) return true;

    // Missing chromosome in genome?
    String chrName = marker.getChromosomeName();
    Chromosome chr = genome.getChromosome(chrName);
    if (chr == null) return true;

    // Chromosome length is 1 or less?
    if (chr.size() < 1) return true;

    // Tree not found in interval forest?
    if (!intervalForest.hasTree(chrName)) return true;

    // OK, we have the chromosome
    return false;
  }