Exemplo n.º 1
0
  /** Show a string with overall effect */
  public String effect(
      boolean shortFormat, boolean showAaChange, boolean showBioType, boolean useSeqOntology) {
    String e = "";
    String codonEffect = codonEffect(showAaChange, showBioType, useSeqOntology); // Codon effect

    // Create effect string
    if (!codonEffect.isEmpty()) e = codonEffect;
    else if (isRegulation())
      return getEffectTypeString(useSeqOntology) + "[" + ((Regulation) marker).getName() + "]";
    else if (isNextProt())
      return getEffectTypeString(useSeqOntology)
          + "["
          + VcfEffect.vcfEffSafe(((NextProt) marker).getId())
          + "]"; // Make sure this 'id' is not dangerous in a VCF 'EFF' field
    else if (isMotif())
      return getEffectTypeString(useSeqOntology)
          + "["
          + ((Motif) marker).getPwmId()
          + ":"
          + ((Motif) marker).getPwmName()
          + "]";
    else if (isCustom()) {
      // Custom interval
      String label = ((Custom) marker).getLabel();
      double score = ((Custom) marker).getScore();
      if (!Double.isNaN(score)) label = label + ":" + score;
      if (!label.isEmpty()) label = "[" + label + "]";
      return getEffectTypeString(useSeqOntology) + label;
    } else if (isIntergenic() || isIntron() || isSpliceSite())
      e = getEffectTypeString(useSeqOntology);
    else if (!message.isEmpty()) e = getEffectTypeString(useSeqOntology) + ": " + message;
    else if (marker == null)
      e =
          getEffectTypeString(
              useSeqOntology); // There are cases when no marker is associated (e.g. "Out of
                               // chromosome", "No such chromosome", etc.)
    else e = getEffectTypeString(useSeqOntology) + ": " + marker.getId();

    if (shortFormat) e = e.split(":")[0];

    return e;
  }
  /** Show annotations counters in a table */
  void analyzeSequenceConservation() {
    if (verbose)
      Timer.showStdErr(
          "Sequence conservation analysis." //
              + "\n\tAA sequence length  : "
              + 1 //
              + "\n\tMin AA count        : "
              + HIGHLY_CONSERVED_AA_COUNT //
              + "\n\tMin AA conservation : "
              + HIGHLY_CONSERVED_AA_PERCENT //
          );

    ArrayList<String> keys = new ArrayList<String>();
    keys.addAll(countAaSequenceByType.keySet());
    Collections.sort(keys);

    // Show title
    StringBuilder title = new StringBuilder();
    for (char aa : GprSeq.AMINO_ACIDS) title.append(aa + "\t");
    title.append("\t" + title);
    if (verbose)
      System.out.println(
          "Amino acid regions:\n\tTotal\tMax count\tAvg len\tConservation\tCatergory\tControlled Vocabulary\t"
              + title
              + "\tOther AA sequences:");

    // Show AA counts for each 'key'
    for (String key : keys) {
      long seqLen = 0, totalSeqs = 0, maxCount = 0;
      CountByType cbt = countAaSequenceByType.get(key);
      long total = cbt.sum();

      boolean highlyConservedAaSequence = false;

      StringBuilder sb = new StringBuilder();

      // For each single amino acid "sequence"
      for (char aa : GprSeq.AMINO_ACIDS) {
        long count = cbt.get("" + aa);
        if (count > 0) {
          seqLen += 1 * count;
          totalSeqs += count;
          maxCount = Math.max(maxCount, count);

          sb.append(count);
          double perc = ((double) count) / total;

          // We estimate that if most AA are the same, then changing this AA can cause a high impact
          // in protein coding
          if ((perc > HIGHLY_CONSERVED_AA_PERCENT) && (total >= HIGHLY_CONSERVED_AA_COUNT))
            highlyConservedAaSequence = true;
        }
        sb.append("\t");
      }

      // Sequences of more than one AA
      for (String aas : cbt.keySet()) {
        long count = cbt.get(aas);
        double perc = ((double) count) / total;
        if (aas.length() > 1) {
          seqLen += aas.length() * count;
          totalSeqs += count;
          maxCount = Math.max(maxCount, count);

          sb.append(String.format("\t" + aas + ":" + count));
          if ((perc > HIGHLY_CONSERVED_AA_PERCENT) && (total >= HIGHLY_CONSERVED_AA_COUNT))
            highlyConservedAaSequence = true;
        }
      }

      long avgLen = seqLen / totalSeqs;

      // Show line
      if (verbose)
        System.out.println( //
            "\t"
                + total //
                + "\t"
                + maxCount //
                + "\t"
                + avgLen //
                + "\t"
                + (highlyConservedAaSequence ? "High" : "") //
                + "\t"
                + key //
                + "\t"
                + sb //
            );

      // Mark highly conserved
      if (highlyConservedAaSequence) {
        int count = 0;
        for (Marker m : markers) {
          NextProt nextProt = (NextProt) m;
          if (m.getId().equals(key)) {
            nextProt.setHighlyConservedAaSequence(true);
            count++;
          }
        }

        if (verbose)
          Timer.showStdErr(
              "NextProt "
                  + count
                  + " markers type '"
                  + key
                  + "' marked as highly conserved AA sequence");
      }
    }
  }
Exemplo n.º 3
0
  public String toString(boolean useSeqOntology, boolean useHgvs) {
    // Get data to show
    String geneId = "", geneName = "", bioType = "", transcriptId = "", exonId = "", customId = "";
    int exonRank = -1;

    if (marker != null) {
      // Gene Id, name and biotype
      Gene gene = getGene();
      Transcript tr = getTranscript();

      // CDS size info
      if (gene != null) {
        geneId = gene.getId();
        geneName = gene.getGeneName();
        bioType = getBiotype();
      }

      // Update trId
      if (tr != null) transcriptId = tr.getId();

      // Exon rank information
      Exon exon = getExon();
      if (exon != null) {
        exonId = exon.getId();
        exonRank = exon.getRank();
      }

      // Regulation
      if (isRegulation()) bioType = ((Regulation) marker).getCellType();
    }

    // Add seqChage's ID
    if (!variant.getId().isEmpty()) customId += variant.getId();

    // Add custom markers
    if ((marker != null) && (marker instanceof Custom))
      customId += (customId.isEmpty() ? "" : ";") + marker.getId();

    // CDS length
    int cdsSize = getCdsLength();

    String errWarn = error + (error.isEmpty() ? "" : "|") + warning;

    String aaChange = "";
    if (useHgvs) aaChange = getHgvs();
    else aaChange = ((aaRef.length() + aaAlt.length()) > 0 ? aaRef + "/" + aaAlt : "");

    return errWarn //
        + "\t"
        + geneId //
        + "\t"
        + geneName //
        + "\t"
        + bioType //
        + "\t"
        + transcriptId //
        + "\t"
        + exonId //
        + "\t"
        + (exonRank >= 0 ? exonRank : "") //
        + "\t"
        + effect(false, false, false, useSeqOntology) //
        + "\t"
        + aaChange //
        + "\t"
        + ((codonsRef.length() + codonsAlt.length()) > 0 ? codonsRef + "/" + codonsAlt : "") //
        + "\t"
        + (codonNum >= 0 ? (codonNum + 1) : "") //
        + "\t"
        + (codonDegeneracy >= 0 ? codonDegeneracy + "" : "") //
        + "\t"
        + (cdsSize >= 0 ? cdsSize : "") //
        + "\t"
        + (codonsAroundOld.length() > 0 ? codonsAroundOld + " / " + codonsAroundNew : "") //
        + "\t"
        + (aasAroundOld.length() > 0 ? aasAroundOld + " / " + aasAroundNew : "") //
        + "\t"
        + customId //
    ;
  }