/** Show a string with overall effect */ public String effect( boolean shortFormat, boolean showAaChange, boolean showBioType, boolean useSeqOntology) { String e = ""; String codonEffect = codonEffect(showAaChange, showBioType, useSeqOntology); // Codon effect // Create effect string if (!codonEffect.isEmpty()) e = codonEffect; else if (isRegulation()) return getEffectTypeString(useSeqOntology) + "[" + ((Regulation) marker).getName() + "]"; else if (isNextProt()) return getEffectTypeString(useSeqOntology) + "[" + VcfEffect.vcfEffSafe(((NextProt) marker).getId()) + "]"; // Make sure this 'id' is not dangerous in a VCF 'EFF' field else if (isMotif()) return getEffectTypeString(useSeqOntology) + "[" + ((Motif) marker).getPwmId() + ":" + ((Motif) marker).getPwmName() + "]"; else if (isCustom()) { // Custom interval String label = ((Custom) marker).getLabel(); double score = ((Custom) marker).getScore(); if (!Double.isNaN(score)) label = label + ":" + score; if (!label.isEmpty()) label = "[" + label + "]"; return getEffectTypeString(useSeqOntology) + label; } else if (isIntergenic() || isIntron() || isSpliceSite()) e = getEffectTypeString(useSeqOntology); else if (!message.isEmpty()) e = getEffectTypeString(useSeqOntology) + ": " + message; else if (marker == null) e = getEffectTypeString( useSeqOntology); // There are cases when no marker is associated (e.g. "Out of // chromosome", "No such chromosome", etc.) else e = getEffectTypeString(useSeqOntology) + ": " + marker.getId(); if (shortFormat) e = e.split(":")[0]; return e; }
/** Show annotations counters in a table */ void analyzeSequenceConservation() { if (verbose) Timer.showStdErr( "Sequence conservation analysis." // + "\n\tAA sequence length : " + 1 // + "\n\tMin AA count : " + HIGHLY_CONSERVED_AA_COUNT // + "\n\tMin AA conservation : " + HIGHLY_CONSERVED_AA_PERCENT // ); ArrayList<String> keys = new ArrayList<String>(); keys.addAll(countAaSequenceByType.keySet()); Collections.sort(keys); // Show title StringBuilder title = new StringBuilder(); for (char aa : GprSeq.AMINO_ACIDS) title.append(aa + "\t"); title.append("\t" + title); if (verbose) System.out.println( "Amino acid regions:\n\tTotal\tMax count\tAvg len\tConservation\tCatergory\tControlled Vocabulary\t" + title + "\tOther AA sequences:"); // Show AA counts for each 'key' for (String key : keys) { long seqLen = 0, totalSeqs = 0, maxCount = 0; CountByType cbt = countAaSequenceByType.get(key); long total = cbt.sum(); boolean highlyConservedAaSequence = false; StringBuilder sb = new StringBuilder(); // For each single amino acid "sequence" for (char aa : GprSeq.AMINO_ACIDS) { long count = cbt.get("" + aa); if (count > 0) { seqLen += 1 * count; totalSeqs += count; maxCount = Math.max(maxCount, count); sb.append(count); double perc = ((double) count) / total; // We estimate that if most AA are the same, then changing this AA can cause a high impact // in protein coding if ((perc > HIGHLY_CONSERVED_AA_PERCENT) && (total >= HIGHLY_CONSERVED_AA_COUNT)) highlyConservedAaSequence = true; } sb.append("\t"); } // Sequences of more than one AA for (String aas : cbt.keySet()) { long count = cbt.get(aas); double perc = ((double) count) / total; if (aas.length() > 1) { seqLen += aas.length() * count; totalSeqs += count; maxCount = Math.max(maxCount, count); sb.append(String.format("\t" + aas + ":" + count)); if ((perc > HIGHLY_CONSERVED_AA_PERCENT) && (total >= HIGHLY_CONSERVED_AA_COUNT)) highlyConservedAaSequence = true; } } long avgLen = seqLen / totalSeqs; // Show line if (verbose) System.out.println( // "\t" + total // + "\t" + maxCount // + "\t" + avgLen // + "\t" + (highlyConservedAaSequence ? "High" : "") // + "\t" + key // + "\t" + sb // ); // Mark highly conserved if (highlyConservedAaSequence) { int count = 0; for (Marker m : markers) { NextProt nextProt = (NextProt) m; if (m.getId().equals(key)) { nextProt.setHighlyConservedAaSequence(true); count++; } } if (verbose) Timer.showStdErr( "NextProt " + count + " markers type '" + key + "' marked as highly conserved AA sequence"); } } }
public String toString(boolean useSeqOntology, boolean useHgvs) { // Get data to show String geneId = "", geneName = "", bioType = "", transcriptId = "", exonId = "", customId = ""; int exonRank = -1; if (marker != null) { // Gene Id, name and biotype Gene gene = getGene(); Transcript tr = getTranscript(); // CDS size info if (gene != null) { geneId = gene.getId(); geneName = gene.getGeneName(); bioType = getBiotype(); } // Update trId if (tr != null) transcriptId = tr.getId(); // Exon rank information Exon exon = getExon(); if (exon != null) { exonId = exon.getId(); exonRank = exon.getRank(); } // Regulation if (isRegulation()) bioType = ((Regulation) marker).getCellType(); } // Add seqChage's ID if (!variant.getId().isEmpty()) customId += variant.getId(); // Add custom markers if ((marker != null) && (marker instanceof Custom)) customId += (customId.isEmpty() ? "" : ";") + marker.getId(); // CDS length int cdsSize = getCdsLength(); String errWarn = error + (error.isEmpty() ? "" : "|") + warning; String aaChange = ""; if (useHgvs) aaChange = getHgvs(); else aaChange = ((aaRef.length() + aaAlt.length()) > 0 ? aaRef + "/" + aaAlt : ""); return errWarn // + "\t" + geneId // + "\t" + geneName // + "\t" + bioType // + "\t" + transcriptId // + "\t" + exonId // + "\t" + (exonRank >= 0 ? exonRank : "") // + "\t" + effect(false, false, false, useSeqOntology) // + "\t" + aaChange // + "\t" + ((codonsRef.length() + codonsAlt.length()) > 0 ? codonsRef + "/" + codonsAlt : "") // + "\t" + (codonNum >= 0 ? (codonNum + 1) : "") // + "\t" + (codonDegeneracy >= 0 ? codonDegeneracy + "" : "") // + "\t" + (cdsSize >= 0 ? cdsSize : "") // + "\t" + (codonsAroundOld.length() > 0 ? codonsAroundOld + " / " + codonsAroundNew : "") // + "\t" + (aasAroundOld.length() > 0 ? aasAroundOld + " / " + aasAroundNew : "") // + "\t" + customId // ; }