Beispiel #1
0
  @Override
  public boolean run() {
    for (Gene g : config.getSnpEffectPredictor().getGenome().getGenes()) {
      //			System.out.println(g.getGeneName());
      for (Transcript tr : g) {
        if (!tr.isProteinCoding()) continue;
        if (tr.introns().size() < 2) continue;

        // System.out.println("\t" + tr.getId());
        for (Intron i : tr.introns()) {
          int pos = i.getStart() + (int) (Math.random() * (i.size() - 2)) + 1;

          String line =
              i.getChromosomeName()
                  + "\t"
                  + pos
                  + "\t.\tA\tT\t.\t.\tAC=1;GENE="
                  + g.getGeneName()
                  + ";TR="
                  + tr.getId()
                  + ";INTRON="
                  + i.getRank();
          System.out.println(line);
          sb.append(line + "\n");
        }
      }
    }

    Gpr.toFile(Gpr.HOME + "/introns_test.vcf", sb);

    return true;
  }
  /** Show a genome in a 'standard' way */
  String show(Genome genome) {
    StringBuilder sb = new StringBuilder();

    // Genome
    sb.append(genome.getVersion() + "\n");

    // Chromosomes
    for (Chromosome chr : genome) sb.append(chr + "\n");

    // Genes
    ArrayList<Gene> genes = new ArrayList<Gene>();

    // Sort genes
    for (Gene gene : genome.getGenes()) genes.add(gene);
    Collections.sort(genes);

    // Show genes
    for (Gene gene : genes) {
      sb.append(gene);
      for (Transcript tr : gene.sortedStrand())
        sb.append("\t\tCDS '" + tr.getId() + "': " + tr.cds() + "\n");
    }

    return sb.toString();
  }
  /**
   * Add into to a hash
   *
   * @param hits
   * @param marker
   * @param hit2add
   * @param showGeneDetails
   * @param compareTemplate
   */
  void regionsAddHit(
      HashSet<String> hits,
      Marker hit2add,
      Marker marker,
      boolean showGeneDetails,
      boolean compareTemplate) {
    String hitStr = hit2add.getClass().getSimpleName();

    if (compareTemplate) {
      Gene gene = (Gene) hit2add.findParent(Gene.class);
      if (gene != null)
        hitStr +=
            (hit2add.isStrandPlus() == marker.isStrandPlus())
                ? "_TEMPLATE_STRAND"
                : "_NON_TEMPLATE_STRAND";
    }

    if (showGeneDetails && (hit2add instanceof Gene)) {
      Gene gene = (Gene) hit2add;
      hitStr +=
          "["
              + gene.getBioType()
              + ", "
              + gene.getGeneName()
              + ", "
              + (gene.isProteinCoding() ? "protein" : "not-protein")
              + "]";
    }

    hits.add(hitStr); // Add marker name to the list
  }
  /**
   * Return a collection of intervals that intersect 'marker' Query resulting genes, transcripts and
   * exons to get ALL types of intervals possible
   *
   * @return
   */
  public Markers queryDeep(Marker marker) {
    if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(marker))
      throw new RuntimeEOFException("Chromosome missing for marker: " + marker);

    boolean hitChromo = false;
    Markers hits = new Markers();
    Markers intersects = query(marker);

    if (intersects.size() > 0) {
      for (Marker m : intersects) {
        hits.add(m);

        if (m instanceof Chromosome) {
          hitChromo = true; // OK (we have to hit a chromosome, otherwise it's an error
        } else if (m instanceof Gene) {
          // Analyze Genes
          Gene gene = (Gene) m;
          hits.addAll(gene.query(marker));
        }
      }
    }

    if (!hitChromo && Config.get().isErrorChromoHit())
      throw new RuntimeException("ERROR: Out of chromosome range. " + marker);
    return hits;
  }
Beispiel #5
0
  /** Get biotype */
  public String getBiotype() {
    Gene gene = getGene();
    if (gene == null) return "";

    Transcript tr = getTranscript();
    if (tr != null) return tr.getBioType();
    else if (gene.getGenome().hasCodingInfo())
      return (gene.isProteinCoding() ? "coding" : "non-coding");

    return "";
  }
  /** Count bases covered for each marker type */
  public void countBases() {
    // ---
    // Add all markers
    // ---
    Markers markers = new Markers();
    markers.add(snpEffectPredictor.getMarkers());
    for (Gene gene : snpEffectPredictor.getGenome().getGenes()) {
      markers.add(gene);
      markers.add(gene.markers());
    }

    for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr);

    // ---
    // Calculate raw counts
    // ---
    for (Marker m : markers) {
      String mtype = markerTypes.getType(m);
      String msubtype = markerTypes.getSubType(m);

      rawCountMarkers.inc(mtype);
      rawCountBases.inc(mtype, m.size());

      // Count sub-types (if any)
      if (msubtype != null) {
        rawCountMarkers.inc(msubtype);
        rawCountBases.inc(msubtype, m.size());
      }
    }

    // ---
    // Count number of bases for each marker type (overlap and join)
    // ---
    for (String mtype : rawCountMarkers.keysSorted()) {
      if (mtype.equals(Chromosome.class.getSimpleName()))
        continue; // We calculate chromosomes later (it's faster)

      if (verbose) System.err.print(mtype + ":");

      if (countMarkers.get(mtype) == 0) {
        for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers);
      }

      if (verbose) System.err.println("");
    }

    // Show chromosomes length
    String mtype = Chromosome.class.getSimpleName();
    for (Chromosome chr : snpEffectPredictor.getGenome()) {
      countBases.inc(mtype, chr.size());
      countMarkers.inc(mtype);
    }
  }
Beispiel #7
0
  public void testCase_05_PaeruPA14muccA() {
    String genome = "paeru.PA14";
    String gff3File = "tests/paeru.PA14.muccA.gff";
    String resultFile = "tests/paeru.PA14.muccA.txt";
    SnpEffectPredictor sep = buildAndCompare(genome, gff3File, resultFile, true, false);

    // Make sure no splice site is added
    Gene gene = sep.getGenome().getGenes().iterator().next();
    Transcript tr = gene.iterator().next();
    List<SpliceSite> spliceSites = tr.createSpliceSites(SpliceSite.CORE_SPLICE_SITE_SIZE, 0, 0, 0);
    Assert.assertEquals(0, spliceSites.size());
  }
  /**
   * Find closest gene to this marker
   *
   * <p>In case more than one 'closest' gene is found (e.g. two or more genes at the same distance).
   * The following rules apply:
   *
   * <p>i) If many genes have the same 'closest distance', coding genes are preferred.
   *
   * <p>ii) If more than one coding gene has the same 'closet distance', a random gene is returned.
   *
   * @param inputInterval
   */
  public Gene queryClosestGene(Marker inputInterval) {
    int initialExtension = 1000;

    String chrName = inputInterval.getChromosomeName();
    Chromosome chr = genome.getChromosome(chrName);
    if (chr == null) return null;

    if (chr.size() > 0) {
      // Extend interval to capture 'close' genes
      for (int extend = initialExtension; extend < chr.size(); extend *= 2) {
        int start = Math.max(inputInterval.getStart() - extend, 0);
        int end = inputInterval.getEnd() + extend;
        Marker extended = new Marker(chr, start, end, 1, "");

        // Find all genes that intersect with the interval
        Markers markers = query(extended);
        Markers genes = new Markers();
        int minDist = Integer.MAX_VALUE;
        for (Marker m : markers) {
          if (m instanceof Gene) {
            int dist = m.distance(inputInterval);
            if (dist < minDist) {
              genes.add(m);
              minDist = dist;
            }
          }
        }

        // Found something?
        if (genes.size() > 0) {
          // Find a gene having distance 'minDist'. Prefer coding genes
          Gene minDistGene = null;

          for (Marker m : genes) {
            int dist = m.distance(inputInterval);
            if (dist == minDist) {
              Gene gene = (Gene) m;
              if (minDistGene == null) minDistGene = gene;
              else if (!minDistGene.isProteinCoding() && gene.isProteinCoding()) minDistGene = gene;
            }
          }

          return minDistGene;
        }
      }
    }

    // Nothing found
    return null;
  }
Beispiel #9
0
  /**
   * What percentile of the transcripts in this gene are affected?
   *
   * @param gene
   * @return
   */
  double percentOfTranscriptsAffected(Gene gene, HashSet<Transcript> transcripts) {
    if (gene == null) return 0;

    // Count how many transcript are affected in each gene
    int countAffected = 0;
    for (Transcript tr : gene) if (transcripts.contains(tr)) countAffected++;

    return countAffected / ((double) gene.numChilds());
  }
Beispiel #10
0
  public String toString(boolean useSeqOntology, boolean useHgvs) {
    // Get data to show
    String geneId = "", geneName = "", bioType = "", transcriptId = "", exonId = "", customId = "";
    int exonRank = -1;

    if (marker != null) {
      // Gene Id, name and biotype
      Gene gene = getGene();
      Transcript tr = getTranscript();

      // CDS size info
      if (gene != null) {
        geneId = gene.getId();
        geneName = gene.getGeneName();
        bioType = getBiotype();
      }

      // Update trId
      if (tr != null) transcriptId = tr.getId();

      // Exon rank information
      Exon exon = getExon();
      if (exon != null) {
        exonId = exon.getId();
        exonRank = exon.getRank();
      }

      // Regulation
      if (isRegulation()) bioType = ((Regulation) marker).getCellType();
    }

    // Add seqChage's ID
    if (!variant.getId().isEmpty()) customId += variant.getId();

    // Add custom markers
    if ((marker != null) && (marker instanceof Custom))
      customId += (customId.isEmpty() ? "" : ";") + marker.getId();

    // CDS length
    int cdsSize = getCdsLength();

    String errWarn = error + (error.isEmpty() ? "" : "|") + warning;

    String aaChange = "";
    if (useHgvs) aaChange = getHgvs();
    else aaChange = ((aaRef.length() + aaAlt.length()) > 0 ? aaRef + "/" + aaAlt : "");

    return errWarn //
        + "\t"
        + geneId //
        + "\t"
        + geneName //
        + "\t"
        + bioType //
        + "\t"
        + transcriptId //
        + "\t"
        + exonId //
        + "\t"
        + (exonRank >= 0 ? exonRank : "") //
        + "\t"
        + effect(false, false, false, useSeqOntology) //
        + "\t"
        + aaChange //
        + "\t"
        + ((codonsRef.length() + codonsAlt.length()) > 0 ? codonsRef + "/" + codonsAlt : "") //
        + "\t"
        + (codonNum >= 0 ? (codonNum + 1) : "") //
        + "\t"
        + (codonDegeneracy >= 0 ? codonDegeneracy + "" : "") //
        + "\t"
        + (cdsSize >= 0 ? cdsSize : "") //
        + "\t"
        + (codonsAroundOld.length() > 0 ? codonsAroundOld + " / " + codonsAroundNew : "") //
        + "\t"
        + (aasAroundOld.length() > 0 ? aasAroundOld + " / " + aasAroundNew : "") //
        + "\t"
        + customId //
    ;
  }
Beispiel #11
0
  /**
   * Is this single change a LOF?
   *
   * <p>Criteria: 1) Core splice sites acceptors or donors (only CORE ones) 2) Stop gained (if this
   * happens at the last part of the protein, we assume it has no effect) 3) Frame shifts
   *
   * @param changeEffect
   * @return
   */
  protected boolean isLof(ChangeEffect changeEffect) {
    // Not a sequence change? => Not LOF
    if ((changeEffect.getSeqChange() != null) && (!changeEffect.getSeqChange().isVariant()))
      return false;

    // Is this change affecting a protein coding gene?
    Gene gene = changeEffect.getGene();
    Transcript tr = changeEffect.getTranscript();
    if ((gene == null) // No gene affected?
        || (tr == null) // No transcript affected?
        || (!gene.isProteinCoding()
            && !config.isTreatAllAsProteinCoding()) // Not a protein coding gene?
        || (!tr.isProteinCoding()
            && !config.isTreatAllAsProteinCoding()) // Not a protein coding transcript?
    ) return false;

    // ---
    // Is this variant a LOF?
    // ---
    boolean lof = false;

    // Frame shifts
    if (changeEffect.getEffectType() == EffectType.FRAME_SHIFT) {
      // It is assumed that even with a protein coding change at the last 5% of the protein, the
      // protein could still be functional.
      double perc = percentCds(changeEffect);
      lof |= (ignoreProteinCodingBefore <= perc) && (perc <= ignoreProteinCodingAfter);
    }

    // Deletion? Is another method to check
    if (changeEffect.getSeqChange().isDel()) lof |= isLofDeletion(changeEffect);

    // The following effect types can be considered LOF
    switch (changeEffect.getEffectType()) {
      case SPLICE_SITE_ACCEPTOR:
      case SPLICE_SITE_DONOR:
        // Core splice sites are considered LOF
        if ((changeEffect.getMarker() != null)
            && (changeEffect.getMarker() instanceof SpliceSite)) {
          // Get splice site marker and check if it is 'core'
          SpliceSite spliceSite = (SpliceSite) changeEffect.getMarker();
          if (spliceSite.intersectsCoreSpliceSite(changeEffect.getSeqChange()))
            lof = true; // Does it intersect the CORE splice site?
        }
        break;

      case STOP_GAINED:
        lof |= isNmd(changeEffect);
        break;

      case RARE_AMINO_ACID:
      case START_LOST:
        // This one is not in the referenced papers, but we assume that RARE AA and START_LOSS
        // changes are damaging.
        lof = true;
        break;

      default: // All others are not considered LOF
        break;
    }

    // Update sets
    if (lof) {
      transcriptsLof.add(
          changeEffect
              .getTranscript()); // Unique transcripts affected (WARNING: null will be added)
      genesLof.add(changeEffect.getGene()); // Unique genes affected (WARNING: null will be added)
    }

    return lof;
  }
Beispiel #12
0
 /**
  * Remove all transcripts that are NOT in the list
  *
  * @return : Number of transcripts removed
  */
 public int retainAllTranscripts(Set<String> trIds) {
   int total = 0;
   for (Gene g : genome.getGenes()) total += g.keepTranscripts(trIds);
   return total;
 }
Beispiel #13
0
 /** Remove all non-canonical transcripts */
 public void removeNonCanonical() {
   for (Gene g : genome.getGenes()) g.removeNonCanonical();
 }
Beispiel #14
0
  /**
   * Name of the regions hit by a marker
   *
   * @param marker
   * @param showGeneDetails
   * @param compareTemplate
   * @param id : Only use genes or transcripts matching this ID
   * @return
   */
  public Set<String> regions(
      Marker marker, boolean showGeneDetails, boolean compareTemplate, String id) {
    if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(marker))
      throw new RuntimeEOFException("Chromosome missing for marker: " + marker);

    boolean hitChromo = false;
    HashSet<String> hits = new HashSet<String>();

    Markers intersects = query(marker);
    if (intersects.size() > 0) {
      for (Marker markerInt : intersects) {

        if (markerInt instanceof Chromosome) {
          hitChromo = true; // OK (we have to hit a chromosome, otherwise it's an error
          hits.add(markerInt.getClass().getSimpleName()); // Add marker name to the list
        } else if (markerInt instanceof Gene) {
          // Analyze Genes
          Gene gene = (Gene) markerInt;
          regionsAddHit(hits, gene, marker, showGeneDetails, compareTemplate);

          // For all transcripts...
          for (Transcript tr : gene) {
            if ((id == null)
                || gene.getId().equals(id)
                || tr.getId().equals(id)) { // Mathes ID? (...or no ID to match)

              // Does it intersect this transcript?
              if (tr.intersects(marker)) {
                regionsAddHit(hits, tr, marker, showGeneDetails, compareTemplate);

                // Does it intersect a UTR?
                for (Utr utr : tr.getUtrs())
                  if (utr.intersects(marker))
                    regionsAddHit(hits, utr, marker, showGeneDetails, compareTemplate);

                // Does it intersect an exon?
                for (Exon ex : tr)
                  if (ex.intersects(marker))
                    regionsAddHit(hits, ex, marker, showGeneDetails, compareTemplate);

                // Does it intersect an intron?
                for (Intron intron : tr.introns())
                  if (intron.intersects(marker))
                    regionsAddHit(hits, intron, marker, showGeneDetails, compareTemplate);
              }
            }
          }
        } else {
          // No ID to match?
          if (id == null) regionsAddHit(hits, markerInt, marker, showGeneDetails, compareTemplate);
          else {
            // Is ID from transcript?
            Transcript tr = (Transcript) markerInt.findParent(Transcript.class);
            if ((tr != null) && (tr.getId().equals(id))) {
              regionsAddHit(
                  hits,
                  markerInt,
                  marker,
                  showGeneDetails,
                  compareTemplate); // Transcript ID matches => count
            } else {
              // Is ID from gene?
              Gene gene = (Gene) markerInt.findParent(Gene.class);
              if ((gene != null) && (gene.getId().equals(id)))
                regionsAddHit(
                    hits,
                    markerInt,
                    marker,
                    showGeneDetails,
                    compareTemplate); // Gene ID matches => count
            }
          }
        }
      }
    }

    if (!hitChromo) throw new RuntimeException("ERROR: Out of chromosome range. " + marker);
    return hits;
  }