/** Count bases covered for each marker type */
  public void countBases() {
    // ---
    // Add all markers
    // ---
    Markers markers = new Markers();
    markers.add(snpEffectPredictor.getMarkers());
    for (Gene gene : snpEffectPredictor.getGenome().getGenes()) {
      markers.add(gene);
      markers.add(gene.markers());
    }

    for (Chromosome chr : snpEffectPredictor.getGenome()) markers.add(chr);

    // ---
    // Calculate raw counts
    // ---
    for (Marker m : markers) {
      String mtype = markerTypes.getType(m);
      String msubtype = markerTypes.getSubType(m);

      rawCountMarkers.inc(mtype);
      rawCountBases.inc(mtype, m.size());

      // Count sub-types (if any)
      if (msubtype != null) {
        rawCountMarkers.inc(msubtype);
        rawCountBases.inc(msubtype, m.size());
      }
    }

    // ---
    // Count number of bases for each marker type (overlap and join)
    // ---
    for (String mtype : rawCountMarkers.keysSorted()) {
      if (mtype.equals(Chromosome.class.getSimpleName()))
        continue; // We calculate chromosomes later (it's faster)

      if (verbose) System.err.print(mtype + ":");

      if (countMarkers.get(mtype) == 0) {
        for (Chromosome chr : snpEffectPredictor.getGenome()) countBases(mtype, chr, markers);
      }

      if (verbose) System.err.println("");
    }

    // Show chromosomes length
    String mtype = Chromosome.class.getSimpleName();
    for (Chromosome chr : snpEffectPredictor.getGenome()) {
      countBases.inc(mtype, chr.size());
      countMarkers.inc(mtype);
    }
  }
예제 #2
0
  /**
   * Return a collection of intervals that intersect 'marker' Query resulting genes, transcripts and
   * exons to get ALL types of intervals possible
   *
   * @return
   */
  public Markers queryDeep(Marker marker) {
    if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(marker))
      throw new RuntimeEOFException("Chromosome missing for marker: " + marker);

    boolean hitChromo = false;
    Markers hits = new Markers();
    Markers intersects = query(marker);

    if (intersects.size() > 0) {
      for (Marker m : intersects) {
        hits.add(m);

        if (m instanceof Chromosome) {
          hitChromo = true; // OK (we have to hit a chromosome, otherwise it's an error
        } else if (m instanceof Gene) {
          // Analyze Genes
          Gene gene = (Gene) m;
          hits.addAll(gene.query(marker));
        }
      }
    }

    if (!hitChromo && Config.get().isErrorChromoHit())
      throw new RuntimeException("ERROR: Out of chromosome range. " + marker);
    return hits;
  }
  /** Save nextprot markers */
  void save() {
    String nextProtBinFile = config.getDirDataVersion() + "/nextProt.bin";
    if (verbose) Timer.showStdErr("Saving database to file '" + nextProtBinFile + "'");

    // Add chromosomes
    HashSet<Chromosome> chromos = new HashSet<Chromosome>();
    for (Marker m : markers) chromos.add(m.getChromosome());

    // Create a set of all markers to be saved
    Markers markersToSave = new Markers();
    markersToSave.add(genome);
    for (Chromosome chr : chromos) markersToSave.add(chr);
    for (Marker m : markers) markersToSave.add(m);

    // Save
    MarkerSerializer markerSerializer = new MarkerSerializer();
    markerSerializer.save(nextProtBinFile, markersToSave);
  }
예제 #4
0
  /**
   * Create (and add) up-down stream, splice sites, intergenic, etc
   *
   * @return
   */
  public Markers createGenomicRegions() {
    Markers markers = new Markers();

    // Add up-down stream intervals
    for (Marker upDownStream : genome.getGenes().createUpDownStream(upDownStreamLength))
      markers.add(upDownStream);

    // Add splice site intervals
    for (Marker spliceSite :
        genome
            .getGenes()
            .createSpliceSites(
                spliceSiteSize, spliceRegionExonSize, spliceRegionIntronMin, spliceRegionIntronMax))
      markers.add(spliceSite);

    // Intergenic markers
    for (Intergenic intergenic : genome.getGenes().createIntergenic()) markers.add(intergenic);

    return markers;
  }
예제 #5
0
  /**
   * Find closest gene to this marker
   *
   * <p>In case more than one 'closest' gene is found (e.g. two or more genes at the same distance).
   * The following rules apply:
   *
   * <p>i) If many genes have the same 'closest distance', coding genes are preferred.
   *
   * <p>ii) If more than one coding gene has the same 'closet distance', a random gene is returned.
   *
   * @param inputInterval
   */
  public Gene queryClosestGene(Marker inputInterval) {
    int initialExtension = 1000;

    String chrName = inputInterval.getChromosomeName();
    Chromosome chr = genome.getChromosome(chrName);
    if (chr == null) return null;

    if (chr.size() > 0) {
      // Extend interval to capture 'close' genes
      for (int extend = initialExtension; extend < chr.size(); extend *= 2) {
        int start = Math.max(inputInterval.getStart() - extend, 0);
        int end = inputInterval.getEnd() + extend;
        Marker extended = new Marker(chr, start, end, 1, "");

        // Find all genes that intersect with the interval
        Markers markers = query(extended);
        Markers genes = new Markers();
        int minDist = Integer.MAX_VALUE;
        for (Marker m : markers) {
          if (m instanceof Gene) {
            int dist = m.distance(inputInterval);
            if (dist < minDist) {
              genes.add(m);
              minDist = dist;
            }
          }
        }

        // Found something?
        if (genes.size() > 0) {
          // Find a gene having distance 'minDist'. Prefer coding genes
          Gene minDistGene = null;

          for (Marker m : genes) {
            int dist = m.distance(inputInterval);
            if (dist == minDist) {
              Gene gene = (Gene) m;
              if (minDistGene == null) minDistGene = gene;
              else if (!minDistGene.isProteinCoding() && gene.isProteinCoding()) minDistGene = gene;
            }
          }

          return minDistGene;
        }
      }
    }

    // Nothing found
    return null;
  }
예제 #6
0
  /** Create interval trees (forest) */
  public void buildForest() {
    intervalForest = new IntervalForest();

    // Add all chromosomes to forest
    if (useChromosomes) {
      for (Chromosome chr : genome) intervalForest.add(chr);
    }

    // Add all genes to forest
    for (Gene gene : genome.getGenes()) intervalForest.add(gene);

    // ---
    // Create (and add) up-down stream, splice sites, intergenic, etc
    // ---
    markers.add(createGenomicRegions());

    // Add all 'markers' to forest (includes custom intervals)
    intervalForest.add(markers);

    // Build interval forest
    intervalForest.build();
  }
  /**
   * Parse a protein node
   *
   * @param node
   */
  void parseAnnotation(Node ann, String geneId, String category) {
    // Description
    Node descr = findOneNode(ann, NODE_NAME_DESCRIPTION, null, null, null);
    String description = getText(descr);
    if (description == null) description = "";
    else if (description.indexOf(';') > 0)
      description = description.substring(0, description.indexOf(';')); // Cut after semicolon

    // Controlled vocabulary
    Node cv = findOneNode(ann, NODE_NAME_CVNAME, null, null, null);
    String contrVoc = getText(cv);
    if (contrVoc == null) contrVoc = "";

    contrVoc.indexOf(';');
    String cvs[] = contrVoc.split(";", 2);
    String contrVoc2 = "";
    if (cvs.length > 1) {
      contrVoc = cvs[0];
      contrVoc2 = cvs[1];
    }

    // Search annotations
    List<Node> posNodes = findNodes(ann, NODE_NAME_POSITION, null, null, null);
    for (Node pos : posNodes) {

      // Get first & last position
      String first = getAttribute(pos, ATTR_NAME_FIRST);
      String last = getAttribute(pos, ATTR_NAME_LAST);
      int aaStart = Gpr.parseIntSafe(first) - 1;
      int aaEnd = Gpr.parseIntSafe(last) - 1;
      int len = aaEnd - aaStart + 1;

      // Get ID
      Node isoAnn = pos.getParentNode().getParentNode();
      String isoformRef = getAttribute(isoAnn, ATTR_NAME_ISOFORM_REF);

      // Find sequence
      String sequence = sequenceByUniqueName.get(isoformRef);
      String subSeq = "";
      if ((sequence != null) && (aaStart >= 0) && (aaEnd >= aaStart))
        subSeq = sequence.substring(aaStart, aaEnd + 1);

      // Check transcript
      TranscriptData trData = transcriptData(isoformRef, aaStart, aaEnd, sequence, subSeq);

      // Create nextProt markers
      if (trData.ok && (len > 0)) {
        if (debug)
          System.out.println(
              geneId //
                  + "\t"
                  + isoformRef //
                  + "\t"
                  + trData.tr.getId() //
                  + "\t"
                  + category //
                  + "\t"
                  + description //
                  + "\t"
                  + contrVoc //
                  + "\t"
                  + contrVoc2 //
                  + "\t"
                  + first //
                  + "\t"
                  + last //
                  + "\t"
                  + len //
                  + "\t"
                  + trData.chrName //
                  + "\t"
                  + trData.chrPosStart //
                  + "\t"
                  + trData.chrPosEnd //
                  + "\t"
                  + subSeq //
                  + "\t"
                  + trData.codon //
                  + "\t"
                  + trData.aa //
              );

        // Create marker
        String id = key(category, contrVoc, description);
        NextProt nextProt = new NextProt(trData.tr, trData.chrPosStart, trData.chrPosEnd, id);
        markers.add(nextProt);

        // if (subSeq.length() == 1) countAaSequence(category, contrVoc, description, subSeq);
        countAaSequence(category, contrVoc, description, subSeq);
      }
    }
  }
예제 #8
0
 /**
  * Add a set of markers
  *
  * @param markersToAdd
  */
 public void addAll(Markers markersToAdd) {
   for (Marker marker : markersToAdd) markers.add(marker);
 }
예제 #9
0
 /**
  * Add a marker
  *
  * <p>Note: Markers have to be added BEFORE building the interval trees. Interval trees are built
  * the first time you call snpEffect(snp) method.
  *
  * @param marker
  */
 public void add(Marker marker) {
   markers.add(marker);
 }