Esempio n. 1
0
  /**
   * Is this single change a LOF?
   *
   * <p>Criteria: 1) Core splice sites acceptors or donors (only CORE ones) 2) Stop gained (if this
   * happens at the last part of the protein, we assume it has no effect) 3) Frame shifts
   *
   * @param changeEffect
   * @return
   */
  protected boolean isNmd(ChangeEffect changeEffect) {
    Transcript tr = changeEffect.getTranscript();
    if (tr == null)
      throw new RuntimeException("Transcript not found for change:\n\t" + changeEffect);

    // Only one exon? Nothing to do (there is no exon-exon junction)
    if (tr.numChilds() <= 1) return false;

    // Find last valid NMD position
    int lastNmdPos = lastNmdPos(tr);
    if (lastNmdPos < 0) return false; // No valid 'lastNmdPos'? => There is no NMD event.

    // Does this change affect the region 'before' this last NMD position? => It is assumed to be
    // NMD
    Variant seqChange = changeEffect.getSeqChange();

    boolean nmd;
    if (tr.isStrandPlus()) nmd = seqChange.getStart() <= lastNmdPos;
    else nmd = lastNmdPos <= seqChange.getEnd();

    // Update sets and counters
    if (nmd) {
      transcriptsNmd.add(
          changeEffect
              .getTranscript()); // Unique transcripts affected (WARNING: null will be added)
      genesNmd.add(changeEffect.getGene()); // Unique genes affected (WARNING: null will be added)
      nmdCount++;
    }

    return nmd;
  }
Esempio n. 2
0
  /**
   * Predict the effect of a seqChange
   *
   * @param seqChange : Sequence change
   * @param seqChangeRef : Before analyzing results, we have to change markers using seqChangerRef
   *     to create a new reference 'on the fly'
   */
  public ChangeEffects seqChangeEffect(Variant seqChange, Variant seqChangeRef) {
    ChangeEffects changeEffects = new ChangeEffects(seqChange, seqChangeRef);

    // ---
    // Chromosome missing?
    // ---
    if (Config.get().isErrorOnMissingChromo() && isChromosomeMissing(seqChange)) {
      changeEffects.addErrorWarning(ErrorWarningType.ERROR_CHROMOSOME_NOT_FOUND);
      return changeEffects;
    }

    // ---
    // Check that this is not a huge deletion.
    // Huge deletions would crash the rest of the algorithm, so we need to stop them here.
    // ---
    if (seqChange.isDel() && (seqChange.size() > HUGE_DELETION_SIZE_THRESHOLD)) {
      // Get chromosome
      String chromoName = seqChange.getChromosomeName();
      Chromosome chr = genome.getChromosome(chromoName);

      if (chr.size() > 0) {
        double ratio = seqChange.size() / ((double) chr.size());
        if (ratio > HUGE_DELETION_RATIO_THRESHOLD) {
          changeEffects.add(chr, EffectType.CHROMOSOME_LARGE_DELETION, "");
          return changeEffects;
        }
      }
    }

    // ---
    // Query interval tree: Which intervals does seqChange intersect?
    // ---
    Markers intersects = query(seqChange);

    // Show all results
    boolean hitChromo = false, hitSomething = false;
    if (intersects.size() > 0) {
      for (Marker marker : intersects) {
        if (marker instanceof Chromosome) hitChromo = true; // Do we hit any chromosome?
        else { // Analyze all markers
          marker.seqChangeEffect(seqChange, changeEffects, seqChangeRef);
          hitSomething = true;
        }
      }
    }

    // Any errors or intergenic (i.e. did not hit any gene)
    if (!hitChromo) {
      if (Config.get().isErrorChromoHit())
        changeEffects.addErrorWarning(ErrorWarningType.ERROR_OUT_OF_CHROMOSOME_RANGE);
    } else if (!hitSomething) {
      if (Config.get().isOnlyRegulation()) changeEffects.setEffectType(EffectType.NONE);
      else changeEffects.setEffectType(EffectType.INTERGENIC);
    }

    return changeEffects;
  }
Esempio n. 3
0
  /** Net AA change (only for InDels) */
  public String getAaNetChange() {
    String aaLong = "", aaShort = "";

    if (variant.isIns()) {
      aaShort = getAaRef().toUpperCase();
      aaLong = getAaAlt().toUpperCase();
    } else if (variant.isDel()) {
      aaLong = getAaRef().toUpperCase();
      aaShort = getAaAlt().toUpperCase();
    }

    if (aaLong.startsWith(aaShort)) return aaLong.substring(aaShort.length());
    if (aaLong.endsWith(aaLong)) return aaLong.substring(0, aaLong.length() - aaShort.length());

    return aaLong;
  }
Esempio n. 4
0
  @Override
  public int compareTo(VariantEffect varEffOther) {
    // Sort by impact
    int comp = getEffectImpact().compareTo(varEffOther.getEffectImpact());
    if (comp != 0) return comp;

    // Sort by effect
    comp = getEffectType().compareTo(varEffOther.getEffectType());
    if (comp != 0) return comp;

    // Sort by: Is canonical transcript?
    Transcript trThis = getTranscript();
    Transcript trOther = varEffOther.getTranscript();
    if (trThis != null && trOther != null) {
      comp = (trOther.isCanonical() ? 1 : 0) - (trThis.isCanonical() ? 1 : 0);
    }
    if (comp != 0) return comp;

    // Sort by genomic coordinate of affected 'marker'
    if ((trThis != null) && (trOther != null)) comp = trThis.compareToPos(trOther);
    if (comp != 0) return comp;

    // Compare IDs
    if ((trThis != null) && (trOther != null)) comp = trThis.getId().compareTo(trOther.getId());
    if (comp != 0) return comp;

    // Compare by marker
    if ((getMarker() != null) && (varEffOther.getMarker() != null))
      comp = getMarker().compareToPos(varEffOther.getMarker());
    if (comp != 0) return comp;

    // Sort by variant (most of the time this is equal)
    return variant.compareTo(varEffOther.getVariant());
  }
Esempio n. 5
0
  public static void main(String[] args) {
    Timer.showStdErr("Start");

    // Create an input file iterator
    String inputFile = Gpr.HOME + "/snpEff/ins.txt";
    VariantFileIterator seqChangeFileIterator = new SeqChangeTxtFileIterator(inputFile);

    // ---
    // Iterate over input file
    // ---
    for (Variant seqChange : seqChangeFileIterator) {
      System.out.println(seqChange + "\t" + seqChange.getChangeType());
    }

    //		Zzz zzz = new Zzz(null);
    //		zzz.load("testHg3771Chr1");
    //		zzz.run();
    //		Timer.showStdErr("End");
  }
Esempio n. 6
0
  /** Return functional class of this effect (i.e. NONSENSE, MISSENSE, SILENT or NONE) */
  public FunctionalClass getFunctionalClass() {
    if (variant.isSnp()) {
      if (!aaAlt.equals(aaRef)) {
        CodonTable codonTable = marker.codonTable();
        if (codonTable.isStop(codonsAlt)) return FunctionalClass.NONSENSE;

        return FunctionalClass.MISSENSE;
      }
      if (!codonsAlt.equals(codonsRef)) return FunctionalClass.SILENT;
    }

    return FunctionalClass.NONE;
  }
Esempio n. 7
0
  /** Return impact of this effect */
  public EffectImpact getEffectImpact() {
    if (effectImpact == null) {
      if ((variant != null) && (!variant.isVariant())) {
        // Not a change? => Modifier
        effectImpact = EffectImpact.MODIFIER;
      } else {
        // Get efefct's type highest impact
        effectImpact = EffectImpact.MODIFIER;
        for (EffectImpact eimp : effectImpacts)
          if (eimp.compareTo(effectImpact) < 0) effectImpact = eimp;
      }
    }

    return effectImpact;
  }
Esempio n. 8
0
  /**
   * Is this deletion a LOF?
   *
   * <p>Criteria: 1) First (coding) exon deleted 2) More than 50% of coding sequence deleted
   *
   * @param changeEffect
   * @return
   */
  protected boolean isLofDeletion(ChangeEffect changeEffect) {
    Transcript tr = changeEffect.getTranscript();
    if (tr == null)
      throw new RuntimeException("Transcript not found for change:\n\t" + changeEffect);

    // ---
    // Criteria:
    // 		1) First (coding) exon deleted
    // ---
    if (changeEffect.getEffectType() == EffectType.EXON_DELETED) {
      Variant seqChange = changeEffect.getSeqChange();
      if (seqChange == null)
        throw new RuntimeException("Cannot retrieve 'seqChange' from EXON_DELETED effect!");
      if (seqChange.includes(tr.getFirstCodingExon())) return true;
    }

    // ---
    // Criteria:
    // 		2) More than 50% of coding sequence deleted
    // ---

    // Find coding part of the transcript (i.e. no UTRs)
    Variant seqChange = changeEffect.getSeqChange();
    int cdsStart = tr.isStrandPlus() ? tr.getCdsStart() : tr.getCdsEnd();
    int cdsEnd = tr.isStrandPlus() ? tr.getCdsEnd() : tr.getCdsStart();
    Marker coding = new Marker(seqChange.getChromosome(), cdsStart, cdsEnd, 1, "");

    // Create an interval intersecting the CDS and the deletion
    int start = Math.max(cdsStart, seqChange.getStart());
    int end = Math.min(cdsEnd, seqChange.getEnd());
    if (start >= end) return false; // No intersections with coding part of the exon? => not LOF
    Marker codingDeleted = new Marker(seqChange.getChromosome(), start, end, 1, "");

    // Count:
    //   - number of coding bases deleted
    //   - number of coding bases
    int codingBasesDeleted = 0, codingBases = 0;
    for (Exon exon : tr) {
      codingBasesDeleted += codingDeleted.intersectSize(exon);
      codingBases += coding.intersectSize(exon);
    }

    // More than a threshold? => It is a LOF
    double percDeleted = codingBasesDeleted / ((double) codingBases);
    return (percDeleted > deleteProteinCodingBases);
  }
Esempio n. 9
0
  /** Annotate a VCF entry */
  public boolean annotate(Variant variant, Map<String, String> info) {
    if (verbose) Gpr.showMark(++countVariants, SHOW_EVERY);

    // Find in database
    Collection<DbNsfpEntry> dbEntries = dbNsfp.query(variant);
    if (dbEntries == null || dbEntries.isEmpty()) return false;

    // Add all INFO fields that refer to this allele
    boolean annotated = false;
    for (String fieldKey : fieldsToAdd.keySet()) {
      // Are there any values to annotate?
      String infoValue = getVcfInfo(dbEntries, fieldKey);

      // Missing or empty?
      if (annotateEmpty) {
        if (infoValue.isEmpty()) infoValue = ".";
      } else if (isDbNsfpValueEmpty(infoValue)) {
        infoValue = null;
      }

      // Add annotations
      if (infoValue != null) {
        String oldInfo = info.get(fieldKey);
        if (oldInfo == null) oldInfo = "";

        info.put(fieldKey, oldInfo + (oldInfo.isEmpty() ? "" : ",") + infoValue);
        annotated = true;
      }
    }

    // Show progress
    if (annotated) {
      countAnnotated++;
      if (debug) Gpr.debug("Annotated: " + variant.toStr());
    }

    return annotated;
  }
Esempio n. 10
0
  public String toString(boolean useSeqOntology, boolean useHgvs) {
    // Get data to show
    String geneId = "", geneName = "", bioType = "", transcriptId = "", exonId = "", customId = "";
    int exonRank = -1;

    if (marker != null) {
      // Gene Id, name and biotype
      Gene gene = getGene();
      Transcript tr = getTranscript();

      // CDS size info
      if (gene != null) {
        geneId = gene.getId();
        geneName = gene.getGeneName();
        bioType = getBiotype();
      }

      // Update trId
      if (tr != null) transcriptId = tr.getId();

      // Exon rank information
      Exon exon = getExon();
      if (exon != null) {
        exonId = exon.getId();
        exonRank = exon.getRank();
      }

      // Regulation
      if (isRegulation()) bioType = ((Regulation) marker).getCellType();
    }

    // Add seqChage's ID
    if (!variant.getId().isEmpty()) customId += variant.getId();

    // Add custom markers
    if ((marker != null) && (marker instanceof Custom))
      customId += (customId.isEmpty() ? "" : ";") + marker.getId();

    // CDS length
    int cdsSize = getCdsLength();

    String errWarn = error + (error.isEmpty() ? "" : "|") + warning;

    String aaChange = "";
    if (useHgvs) aaChange = getHgvs();
    else aaChange = ((aaRef.length() + aaAlt.length()) > 0 ? aaRef + "/" + aaAlt : "");

    return errWarn //
        + "\t"
        + geneId //
        + "\t"
        + geneName //
        + "\t"
        + bioType //
        + "\t"
        + transcriptId //
        + "\t"
        + exonId //
        + "\t"
        + (exonRank >= 0 ? exonRank : "") //
        + "\t"
        + effect(false, false, false, useSeqOntology) //
        + "\t"
        + aaChange //
        + "\t"
        + ((codonsRef.length() + codonsAlt.length()) > 0 ? codonsRef + "/" + codonsAlt : "") //
        + "\t"
        + (codonNum >= 0 ? (codonNum + 1) : "") //
        + "\t"
        + (codonDegeneracy >= 0 ? codonDegeneracy + "" : "") //
        + "\t"
        + (cdsSize >= 0 ? cdsSize : "") //
        + "\t"
        + (codonsAroundOld.length() > 0 ? codonsAroundOld + " / " + codonsAroundNew : "") //
        + "\t"
        + (aasAroundOld.length() > 0 ? aasAroundOld + " / " + aasAroundNew : "") //
        + "\t"
        + customId //
    ;
  }
Esempio n. 11
0
 /** Get genotype string */
 public String getGenotype() {
   if (variant == null) return "";
   return variant.getGenotype();
 }
Esempio n. 12
0
 /** Codon change string (if it's not too long) */
 public String getCodonChangeMax() {
   if (variant.size() > MAX_CODON_SEQUENCE_LEN)
     return ""; // Cap length in order not to make VCF files grow too much
   if (codonsRef.isEmpty() && codonsAlt.isEmpty()) return "";
   return codonsRef + "/" + codonsAlt;
 }
Esempio n. 13
0
  void analyze(int i, int pos, String ref, String mnp) {
    String codons = codons();

    Variant seqChange = new Variant(chromosome, pos, ref + "", mnp + "", "");

    // ---
    // Calculate effects
    // ---
    VariantEffects effects = snpEffectPredictor.variantEffect(seqChange);

    // Show
    VariantEffect effect = null;
    if (effects.size() > 1) { // Usually there is only one effect
      for (VariantEffect ce : effects) {
        if ((ce.getEffectType() != EffectType.SPLICE_SITE_ACCEPTOR) //
            && (ce.getEffectType() != EffectType.SPLICE_SITE_DONOR) //
            && (ce.getEffectType() != EffectType.INTRON) //
            && (ce.getEffectType() != EffectType.INTERGENIC) //
        ) //
        effect = ce;
      }
    } else effect = effects.get();

    if (effect != null) {
      String effStr = effect.effect(true, true, true, false);

      if (codons.length() > 1) {
        String codonsExp[] = codons.split("/");

        boolean error =
            (!codonsExp[0].toUpperCase().equals(effect.getCodonsRef().toUpperCase()) //
                || !codonsExp[1].toUpperCase().equals(effect.getCodonsAlt().toUpperCase()));

        if (error || debug) {
          Gpr.debug(
              "Fatal error:" //
                  + "\n\tPos           : "
                  + pos //
                  + "\n\tSeqChange     : "
                  + seqChange
                  + (seqChange.isStrandPlus() ? "+" : "-") //
                  + "\n\tCodon (exp)   : "
                  + codons //
                  + "\n\tCodon (pred)  : "
                  + effect.getCodonsRef().toUpperCase()
                  + "/"
                  + effect.getCodonsAlt().toUpperCase() //
                  + "\n\tEffect (pred) : "
                  + effStr //
                  + "\n\tEffect (pred) : "
                  + effect //
                  + "\n\tGene          : "
                  + gene //
                  + "\n\tChromo        : "
                  + chromoSequence //
              );
        }

        /** Error? Dump so we can debug... */
        if (error) {
          System.err.println("Error. Dumping data");
          Save save = new Save();
          save.snpEffectPredictor = snpEffectPredictor;
          save.chromoSequence = chromoSequence;
          save.chromoNewSequence = chromoNewSequence;
          save.ref = ref;
          save.pos = pos;
          save.mnp = mnp;
          String outFile = "/tmp/sep_" + i + "_" + pos + ".bin";
          Gpr.toFileSerialize(outFile, save);
          throw new RuntimeException("Codons do not match!\n\tData dumped: '" + outFile + "'");
        }
      }
    }
  }