/**
   * Find closest gene to this marker
   *
   * <p>In case more than one 'closest' gene is found (e.g. two or more genes at the same distance).
   * The following rules apply:
   *
   * <p>i) If many genes have the same 'closest distance', coding genes are preferred.
   *
   * <p>ii) If more than one coding gene has the same 'closet distance', a random gene is returned.
   *
   * @param inputInterval
   */
  public Gene queryClosestGene(Marker inputInterval) {
    int initialExtension = 1000;

    String chrName = inputInterval.getChromosomeName();
    Chromosome chr = genome.getChromosome(chrName);
    if (chr == null) return null;

    if (chr.size() > 0) {
      // Extend interval to capture 'close' genes
      for (int extend = initialExtension; extend < chr.size(); extend *= 2) {
        int start = Math.max(inputInterval.getStart() - extend, 0);
        int end = inputInterval.getEnd() + extend;
        Marker extended = new Marker(chr, start, end, 1, "");

        // Find all genes that intersect with the interval
        Markers markers = query(extended);
        Markers genes = new Markers();
        int minDist = Integer.MAX_VALUE;
        for (Marker m : markers) {
          if (m instanceof Gene) {
            int dist = m.distance(inputInterval);
            if (dist < minDist) {
              genes.add(m);
              minDist = dist;
            }
          }
        }

        // Found something?
        if (genes.size() > 0) {
          // Find a gene having distance 'minDist'. Prefer coding genes
          Gene minDistGene = null;

          for (Marker m : genes) {
            int dist = m.distance(inputInterval);
            if (dist == minDist) {
              Gene gene = (Gene) m;
              if (minDistGene == null) minDistGene = gene;
              else if (!minDistGene.isProteinCoding() && gene.isProteinCoding()) minDistGene = gene;
            }
          }

          return minDistGene;
        }
      }
    }

    // Nothing found
    return null;
  }
  /**
   * Add into to a hash
   *
   * @param hits
   * @param marker
   * @param hit2add
   * @param showGeneDetails
   * @param compareTemplate
   */
  void regionsAddHit(
      HashSet<String> hits,
      Marker hit2add,
      Marker marker,
      boolean showGeneDetails,
      boolean compareTemplate) {
    String hitStr = hit2add.getClass().getSimpleName();

    if (compareTemplate) {
      Gene gene = (Gene) hit2add.findParent(Gene.class);
      if (gene != null)
        hitStr +=
            (hit2add.isStrandPlus() == marker.isStrandPlus())
                ? "_TEMPLATE_STRAND"
                : "_NON_TEMPLATE_STRAND";
    }

    if (showGeneDetails && (hit2add instanceof Gene)) {
      Gene gene = (Gene) hit2add;
      hitStr +=
          "["
              + gene.getBioType()
              + ", "
              + gene.getGeneName()
              + ", "
              + (gene.isProteinCoding() ? "protein" : "not-protein")
              + "]";
    }

    hits.add(hitStr); // Add marker name to the list
  }
Beispiel #3
0
  /** Get biotype */
  public String getBiotype() {
    Gene gene = getGene();
    if (gene == null) return "";

    Transcript tr = getTranscript();
    if (tr != null) return tr.getBioType();
    else if (gene.getGenome().hasCodingInfo())
      return (gene.isProteinCoding() ? "coding" : "non-coding");

    return "";
  }
Beispiel #4
0
  /**
   * Is this single change a LOF?
   *
   * <p>Criteria: 1) Core splice sites acceptors or donors (only CORE ones) 2) Stop gained (if this
   * happens at the last part of the protein, we assume it has no effect) 3) Frame shifts
   *
   * @param changeEffect
   * @return
   */
  protected boolean isLof(ChangeEffect changeEffect) {
    // Not a sequence change? => Not LOF
    if ((changeEffect.getSeqChange() != null) && (!changeEffect.getSeqChange().isVariant()))
      return false;

    // Is this change affecting a protein coding gene?
    Gene gene = changeEffect.getGene();
    Transcript tr = changeEffect.getTranscript();
    if ((gene == null) // No gene affected?
        || (tr == null) // No transcript affected?
        || (!gene.isProteinCoding()
            && !config.isTreatAllAsProteinCoding()) // Not a protein coding gene?
        || (!tr.isProteinCoding()
            && !config.isTreatAllAsProteinCoding()) // Not a protein coding transcript?
    ) return false;

    // ---
    // Is this variant a LOF?
    // ---
    boolean lof = false;

    // Frame shifts
    if (changeEffect.getEffectType() == EffectType.FRAME_SHIFT) {
      // It is assumed that even with a protein coding change at the last 5% of the protein, the
      // protein could still be functional.
      double perc = percentCds(changeEffect);
      lof |= (ignoreProteinCodingBefore <= perc) && (perc <= ignoreProteinCodingAfter);
    }

    // Deletion? Is another method to check
    if (changeEffect.getSeqChange().isDel()) lof |= isLofDeletion(changeEffect);

    // The following effect types can be considered LOF
    switch (changeEffect.getEffectType()) {
      case SPLICE_SITE_ACCEPTOR:
      case SPLICE_SITE_DONOR:
        // Core splice sites are considered LOF
        if ((changeEffect.getMarker() != null)
            && (changeEffect.getMarker() instanceof SpliceSite)) {
          // Get splice site marker and check if it is 'core'
          SpliceSite spliceSite = (SpliceSite) changeEffect.getMarker();
          if (spliceSite.intersectsCoreSpliceSite(changeEffect.getSeqChange()))
            lof = true; // Does it intersect the CORE splice site?
        }
        break;

      case STOP_GAINED:
        lof |= isNmd(changeEffect);
        break;

      case RARE_AMINO_ACID:
      case START_LOST:
        // This one is not in the referenced papers, but we assume that RARE AA and START_LOSS
        // changes are damaging.
        lof = true;
        break;

      default: // All others are not considered LOF
        break;
    }

    // Update sets
    if (lof) {
      transcriptsLof.add(
          changeEffect
              .getTranscript()); // Unique transcripts affected (WARNING: null will be added)
      genesLof.add(changeEffect.getGene()); // Unique genes affected (WARNING: null will be added)
    }

    return lof;
  }