Exemplo n.º 1
0
  /**
   * Return a collection of intervals that intersect 'marker' Query resulting genes, transcripts and
   * exons to get ALL types of intervals possible
   *
   * @return
   */
  public Markers queryDeep(Marker marker) {
    if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(marker))
      throw new RuntimeEOFException("Chromosome missing for marker: " + marker);

    boolean hitChromo = false;
    Markers hits = new Markers();
    Markers intersects = query(marker);

    if (intersects.size() > 0) {
      for (Marker m : intersects) {
        hits.add(m);

        if (m instanceof Chromosome) {
          hitChromo = true; // OK (we have to hit a chromosome, otherwise it's an error
        } else if (m instanceof Gene) {
          // Analyze Genes
          Gene gene = (Gene) m;
          hits.addAll(gene.query(marker));
        }
      }
    }

    if (!hitChromo && Config.get().isErrorChromoHit())
      throw new RuntimeException("ERROR: Out of chromosome range. " + marker);
    return hits;
  }
Exemplo n.º 2
0
  /**
   * Predict the effect of a seqChange
   *
   * @param seqChange : Sequence change
   * @param seqChangeRef : Before analyzing results, we have to change markers using seqChangerRef
   *     to create a new reference 'on the fly'
   */
  public ChangeEffects seqChangeEffect(Variant seqChange, Variant seqChangeRef) {
    ChangeEffects changeEffects = new ChangeEffects(seqChange, seqChangeRef);

    // ---
    // Chromosome missing?
    // ---
    if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(seqChange)) {
      changeEffects.addErrorWarning(ErrorWarningType.ERROR_CHROMOSOME_NOT_FOUND);
      return changeEffects;
    }

    // ---
    // Check that this is not a huge deletion.
    // Huge deletions would crash the rest of the algorithm, so we need to stop them here.
    // ---
    if (seqChange.isDel() && (seqChange.size() > HUGE_DELETION_SIZE_THRESHOLD)) {
      // Get chromosome
      String chromoName = seqChange.getChromosomeName();
      Chromosome chr = genome.getChromosome(chromoName);

      if (chr.size() > 0) {
        double ratio = seqChange.size() / ((double) chr.size());
        if (ratio > HUGE_DELETION_RATIO_THRESHOLD) {
          changeEffects.add(chr, EffectType.CHROMOSOME_LARGE_DELETION, "");
          return changeEffects;
        }
      }
    }

    // ---
    // Query interval tree: Which intervals does seqChange intersect?
    // ---
    Markers intersects = query(seqChange);

    // Show all results
    boolean hitChromo = false, hitSomething = false;
    if (intersects.size() > 0) {
      for (Marker marker : intersects) {
        if (marker instanceof Chromosome) hitChromo = true; // Do we hit any chromosome?
        else { // Analyze all markers
          marker.seqChangeEffect(seqChange, changeEffects, seqChangeRef);
          hitSomething = true;
        }
      }
    }

    // Any errors or intergenic (i.e. did not hit any gene)
    if (!hitChromo) {
      if (Config.get().isErrorChromoHit())
        changeEffects.addErrorWarning(ErrorWarningType.ERROR_OUT_OF_CHROMOSOME_RANGE);
    } else if (!hitSomething) {
      if (Config.get().isOnlyRegulation()) changeEffects.setEffectType(EffectType.NONE);
      else changeEffects.setEffectType(EffectType.INTERGENIC);
    }

    return changeEffects;
  }
  /** Count bases covered for each marker type */
  public void countBases() {
    // ---
    // Add all markers
    // ---
    Markers markers = new Markers();
    markers.add(snpEffectPredictor.getMarkers());
    for (Gene gene : snpEffectPredictor.getGenome().getGenes()) {
      markers.add(gene);
      markers.add(gene.markers());
    }

    for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr);

    // ---
    // Calculate raw counts
    // ---
    for (Marker m : markers) {
      String mtype = markerTypes.getType(m);
      String msubtype = markerTypes.getSubType(m);

      rawCountMarkers.inc(mtype);
      rawCountBases.inc(mtype, m.size());

      // Count sub-types (if any)
      if (msubtype != null) {
        rawCountMarkers.inc(msubtype);
        rawCountBases.inc(msubtype, m.size());
      }
    }

    // ---
    // Count number of bases for each marker type (overlap and join)
    // ---
    for (String mtype : rawCountMarkers.keysSorted()) {
      if (mtype.equals(Chromosome.class.getSimpleName()))
        continue; // We calculate chromosomes later (it's faster)

      if (verbose) System.err.print(mtype + ":");

      if (countMarkers.get(mtype) == 0) {
        for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers);
      }

      if (verbose) System.err.println("");
    }

    // Show chromosomes length
    String mtype = Chromosome.class.getSimpleName();
    for (Chromosome chr : snpEffectPredictor.getGenome()) {
      countBases.inc(mtype, chr.size());
      countMarkers.inc(mtype);
    }
  }
Exemplo n.º 4
0
  /**
   * Find closest gene to this marker
   *
   * <p>In case more than one 'closest' gene is found (e.g. two or more genes at the same distance).
   * The following rules apply:
   *
   * <p>i) If many genes have the same 'closest distance', coding genes are preferred.
   *
   * <p>ii) If more than one coding gene has the same 'closet distance', a random gene is returned.
   *
   * @param inputInterval
   */
  public Gene queryClosestGene(Marker inputInterval) {
    int initialExtension = 1000;

    String chrName = inputInterval.getChromosomeName();
    Chromosome chr = genome.getChromosome(chrName);
    if (chr == null) return null;

    if (chr.size() > 0) {
      // Extend interval to capture 'close' genes
      for (int extend = initialExtension; extend < chr.size(); extend *= 2) {
        int start = Math.max(inputInterval.getStart() - extend, 0);
        int end = inputInterval.getEnd() + extend;
        Marker extended = new Marker(chr, start, end, 1, "");

        // Find all genes that intersect with the interval
        Markers markers = query(extended);
        Markers genes = new Markers();
        int minDist = Integer.MAX_VALUE;
        for (Marker m : markers) {
          if (m instanceof Gene) {
            int dist = m.distance(inputInterval);
            if (dist < minDist) {
              genes.add(m);
              minDist = dist;
            }
          }
        }

        // Found something?
        if (genes.size() > 0) {
          // Find a gene having distance 'minDist'. Prefer coding genes
          Gene minDistGene = null;

          for (Marker m : genes) {
            int dist = m.distance(inputInterval);
            if (dist == minDist) {
              Gene gene = (Gene) m;
              if (minDistGene == null) minDistGene = gene;
              else if (!minDistGene.isProteinCoding() && gene.isProteinCoding()) minDistGene = gene;
            }
          }

          return minDistGene;
        }
      }
    }

    // Nothing found
    return null;
  }
  /** Save nextprot markers */
  void save() {
    String nextProtBinFile = config.getDirDataVersion() + "/nextProt.bin";
    if (verbose) Timer.showStdErr("Saving database to file '" + nextProtBinFile + "'");

    // Add chromosomes
    HashSet<Chromosome> chromos = new HashSet<Chromosome>();
    for (Marker m : markers) chromos.add(m.getChromosome());

    // Create a set of all markers to be saved
    Markers markersToSave = new Markers();
    markersToSave.add(genome);
    for (Chromosome chr : chromos) markersToSave.add(chr);
    for (Marker m : markers) markersToSave.add(m);

    // Save
    MarkerSerializer markerSerializer = new MarkerSerializer();
    markerSerializer.save(nextProtBinFile, markersToSave);
  }
Exemplo n.º 6
0
  /**
   * Create (and add) up-down stream, splice sites, intergenic, etc
   *
   * @return
   */
  public Markers createGenomicRegions() {
    Markers markers = new Markers();

    // Add up-down stream intervals
    for (Marker upDownStream : genome.getGenes().createUpDownStream(upDownStreamLength))
      markers.add(upDownStream);

    // Add splice site intervals
    for (Marker spliceSite :
        genome
            .getGenes()
            .createSpliceSites(
                spliceSiteSize, spliceRegionExonSize, spliceRegionIntronMin, spliceRegionIntronMax))
      markers.add(spliceSite);

    // Intergenic markers
    for (Intergenic intergenic : genome.getGenes().createIntergenic()) markers.add(intergenic);

    return markers;
  }
Exemplo n.º 7
0
  /** Create interval trees (forest) */
  public void buildForest() {
    intervalForest = new IntervalForest();

    // Add all chromosomes to forest
    if (useChromosomes) {
      for (Chromosome chr : genome) intervalForest.add(chr);
    }

    // Add all genes to forest
    for (Gene gene : genome.getGenes()) intervalForest.add(gene);

    // ---
    // Create (and add) up-down stream, splice sites, intergenic, etc
    // ---
    markers.add(createGenomicRegions());

    // Add all 'markers' to forest (includes custom intervals)
    intervalForest.add(markers);

    // Build interval forest
    intervalForest.build();
  }
  /**
   * Parse a protein node
   *
   * @param node
   */
  void parseAnnotation(Node ann, String geneId, String category) {
    // Description
    Node descr = findOneNode(ann, NODE_NAME_DESCRIPTION, null, null, null);
    String description = getText(descr);
    if (description == null) description = "";
    else if (description.indexOf(';') > 0)
      description = description.substring(0, description.indexOf(';')); // Cut after semicolon

    // Controlled vocabulary
    Node cv = findOneNode(ann, NODE_NAME_CVNAME, null, null, null);
    String contrVoc = getText(cv);
    if (contrVoc == null) contrVoc = "";

    contrVoc.indexOf(';');
    String cvs[] = contrVoc.split(";", 2);
    String contrVoc2 = "";
    if (cvs.length > 1) {
      contrVoc = cvs[0];
      contrVoc2 = cvs[1];
    }

    // Search annotations
    List<Node> posNodes = findNodes(ann, NODE_NAME_POSITION, null, null, null);
    for (Node pos : posNodes) {

      // Get first & last position
      String first = getAttribute(pos, ATTR_NAME_FIRST);
      String last = getAttribute(pos, ATTR_NAME_LAST);
      int aaStart = Gpr.parseIntSafe(first) - 1;
      int aaEnd = Gpr.parseIntSafe(last) - 1;
      int len = aaEnd - aaStart + 1;

      // Get ID
      Node isoAnn = pos.getParentNode().getParentNode();
      String isoformRef = getAttribute(isoAnn, ATTR_NAME_ISOFORM_REF);

      // Find sequence
      String sequence = sequenceByUniqueName.get(isoformRef);
      String subSeq = "";
      if ((sequence != null) && (aaStart >= 0) && (aaEnd >= aaStart))
        subSeq = sequence.substring(aaStart, aaEnd + 1);

      // Check transcript
      TranscriptData trData = transcriptData(isoformRef, aaStart, aaEnd, sequence, subSeq);

      // Create nextProt markers
      if (trData.ok && (len > 0)) {
        if (debug)
          System.out.println(
              geneId //
                  + "\t"
                  + isoformRef //
                  + "\t"
                  + trData.tr.getId() //
                  + "\t"
                  + category //
                  + "\t"
                  + description //
                  + "\t"
                  + contrVoc //
                  + "\t"
                  + contrVoc2 //
                  + "\t"
                  + first //
                  + "\t"
                  + last //
                  + "\t"
                  + len //
                  + "\t"
                  + trData.chrName //
                  + "\t"
                  + trData.chrPosStart //
                  + "\t"
                  + trData.chrPosEnd //
                  + "\t"
                  + subSeq //
                  + "\t"
                  + trData.codon //
                  + "\t"
                  + trData.aa //
              );

        // Create marker
        String id = key(category, contrVoc, description);
        NextProt nextProt = new NextProt(trData.tr, trData.chrPosStart, trData.chrPosEnd, id);
        markers.add(nextProt);

        // if (subSeq.length() == 1) countAaSequence(category, contrVoc, description, subSeq);
        countAaSequence(category, contrVoc, description, subSeq);
      }
    }
  }
Exemplo n.º 9
0
  /**
   * Name of the regions hit by a marker
   *
   * @param marker
   * @param showGeneDetails
   * @param compareTemplate
   * @param id : Only use genes or transcripts matching this ID
   * @return
   */
  public Set<String> regions(
      Marker marker, boolean showGeneDetails, boolean compareTemplate, String id) {
    if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(marker))
      throw new RuntimeEOFException("Chromosome missing for marker: " + marker);

    boolean hitChromo = false;
    HashSet<String> hits = new HashSet<String>();

    Markers intersects = query(marker);
    if (intersects.size() > 0) {
      for (Marker markerInt : intersects) {

        if (markerInt instanceof Chromosome) {
          hitChromo = true; // OK (we have to hit a chromosome, otherwise it's an error
          hits.add(markerInt.getClass().getSimpleName()); // Add marker name to the list
        } else if (markerInt instanceof Gene) {
          // Analyze Genes
          Gene gene = (Gene) markerInt;
          regionsAddHit(hits, gene, marker, showGeneDetails, compareTemplate);

          // For all transcripts...
          for (Transcript tr : gene) {
            if ((id == null)
                || gene.getId().equals(id)
                || tr.getId().equals(id)) { // Mathes ID? (...or no ID to match)

              // Does it intersect this transcript?
              if (tr.intersects(marker)) {
                regionsAddHit(hits, tr, marker, showGeneDetails, compareTemplate);

                // Does it intersect a UTR?
                for (Utr utr : tr.getUtrs())
                  if (utr.intersects(marker))
                    regionsAddHit(hits, utr, marker, showGeneDetails, compareTemplate);

                // Does it intersect an exon?
                for (Exon ex : tr)
                  if (ex.intersects(marker))
                    regionsAddHit(hits, ex, marker, showGeneDetails, compareTemplate);

                // Does it intersect an intron?
                for (Intron intron : tr.introns())
                  if (intron.intersects(marker))
                    regionsAddHit(hits, intron, marker, showGeneDetails, compareTemplate);
              }
            }
          }
        } else {
          // No ID to match?
          if (id == null) regionsAddHit(hits, markerInt, marker, showGeneDetails, compareTemplate);
          else {
            // Is ID from transcript?
            Transcript tr = (Transcript) markerInt.findParent(Transcript.class);
            if ((tr != null) && (tr.getId().equals(id))) {
              regionsAddHit(
                  hits,
                  markerInt,
                  marker,
                  showGeneDetails,
                  compareTemplate); // Transcript ID matches => count
            } else {
              // Is ID from gene?
              Gene gene = (Gene) markerInt.findParent(Gene.class);
              if ((gene != null) && (gene.getId().equals(id)))
                regionsAddHit(
                    hits,
                    markerInt,
                    marker,
                    showGeneDetails,
                    compareTemplate); // Gene ID matches => count
            }
          }
        }
      }
    }

    if (!hitChromo) throw new RuntimeException("ERROR: Out of chromosome range. " + marker);
    return hits;
  }
Exemplo n.º 10
0
 /**
  * Add a set of markers
  *
  * @param markersToAdd
  */
 public void addAll(Markers markersToAdd) {
   for (Marker marker : markersToAdd) markers.add(marker);
 }
Exemplo n.º 11
0
 /**
  * Add a marker
  *
  * <p>Note: Markers have to be added BEFORE building the interval trees. Interval trees are built
  * the first time you call snpEffect(snp) method.
  *
  * @param marker
  */
 public void add(Marker marker) {
   markers.add(marker);
 }