/** * @param read a read containing the variant * @return the number of hard clipped and low qual bases at the read start (where start is the * leftmost end w.r.t. the reference) */ public static int getNumClippedBasesAtStart(final SAMRecord read) { // check for hard clips (never consider these bases): final Cigar c = read.getCigar(); final CigarElement first = c.getCigarElement(0); int numStartClippedBases = 0; if (first.getOperator() == CigarOperator.H) { numStartClippedBases = first.getLength(); } final byte[] unclippedReadBases = read.getReadBases(); final byte[] unclippedReadQuals = read.getBaseQualities(); // Do a stricter base clipping than provided by CIGAR string, since this one may be too // conservative, // and may leave a string of Q2 bases still hanging off the reads. // TODO: this code may not even get used because HaplotypeCaller already hard clips low quality // tails for (int i = numStartClippedBases; i < unclippedReadBases.length; i++) { if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD) numStartClippedBases++; else break; } return numStartClippedBases; }
@Override public String process(File page, Map<String, String> query) { loadContigs(); if (query.get("contigName").matches("^[ACGT]+$")) { contigs.put("manual", query.get("contigName")); query.put("contigName", "manual"); } else if (query.get("contigName").matches("^Pf3D7.+$")) { String[] pieces = query.get("contigName").split("[:-]"); int start = Integer.valueOf(pieces[1].replaceAll(",", "")); int end = Integer.valueOf(pieces[2].replaceAll(",", "")); ReferenceSequence rseq = REF.getSubsequenceAt(pieces[0], start, end); contigs.put("manual", new String(rseq.getBases())); query.put("contigName", "manual"); } if (query.containsKey("contigName") && contigs.containsKey(query.get("contigName")) && graphs.containsKey(query.get("graphName"))) { boolean showLinks = query.get("showLinks").equals("links_on"); String contig = contigs.get(query.get("contigName")); String originalContig = contigs.get(query.get("contigName")); String refFormattedString = ""; String kmerOrigin = ""; if (metrics.containsKey(query.get("contigName"))) { String[] loc = metrics.get(query.get("contigName")).get("canonicalLocus").split("[:-]"); if (!loc[0].equals("*")) { boolean isRc = metrics.get(query.get("contigName")).get("isRcCanonical").equals("1"); if (isRc) { contig = SequenceUtils.reverseComplement(contig); originalContig = SequenceUtils.reverseComplement(originalContig); } int locStart = Integer.valueOf(loc[1]); int locEnd = Integer.valueOf(loc[2]); Cigar cigar = cigarStringToCigar(metrics.get(query.get("contigName")).get("cigarCanonical")); if (cigar.getCigarElement(0).getOperator().equals(CigarOperator.S)) { locStart -= cigar.getCigarElement(0).getLength(); } if (cigar .getCigarElement(cigar.getCigarElements().size() - 1) .getOperator() .equals(CigarOperator.S)) { locEnd += cigar.getCigarElement(cigar.getCigarElements().size() - 1).getLength(); } String ref = new String(REF.getSubsequenceAt(loc[0], locStart, locEnd).getBases()); StringBuilder refFormatted = new StringBuilder(); int pos = 0; for (CigarElement ce : cigar.getCigarElements()) { CigarOperator co = ce.getOperator(); switch (co) { case S: refFormatted.append(ref.substring(pos, pos + ce.getLength()).toLowerCase()); break; case M: refFormatted.append(ref.substring(pos, pos + ce.getLength())); break; case I: refFormatted.append(StringUtils.repeat("-", ce.getLength())); break; } if (ce.getOperator().consumesReferenceBases()) { pos += ce.getLength(); } } refFormattedString = refFormatted.toString(); kmerOrigin = metrics.get(query.get("contigName")).get("kmerOrigin"); } } CortexGraph cg = graphs.get(query.get("graphName")); String sampleName = cg.getColor(0).getSampleName(); Set<CortexLinksMap> links = new HashSet<CortexLinksMap>(); if (LINKS != null && !LINKS.isEmpty()) { for (CortexLinksMap link : LINKS) { if (sampleName.equals(link.getCortexLinks().getColor(0).getSampleName())) { links.add(link); } } } Set<String> contigKmers = new HashSet<String>(); for (int i = 0; i <= contig.length() - cg.getKmerSize(); i++) { String curKmer = contig.substring(i, i + cg.getKmerSize()); contigKmers.add(curKmer); } StringBuilder firstFlank = new StringBuilder(); String firstKmer = contig.substring(0, cg.getKmerSize()); Set<String> pks = CortexUtils.getPrevKmers(cg, firstKmer, 0); Set<String> usedPrevKmers = new HashSet<String>(); usedPrevKmers.add(firstKmer); while (pks.size() == 1 && usedPrevKmers.size() <= 100) { String kmer = pks.iterator().next(); firstFlank.insert(0, kmer.charAt(0)); if (usedPrevKmers.contains(kmer)) { break; } usedPrevKmers.add(kmer); pks = CortexUtils.getPrevKmers(cg, kmer, 0); } StringBuilder lastFlank = new StringBuilder(); String lastKmer = contig.substring(contig.length() - cg.getKmerSize(), contig.length()); Set<String> nks = CortexUtils.getNextKmers(cg, lastKmer, 0); Set<String> usedNextKmers = new HashSet<String>(); usedNextKmers.add(lastKmer); while (nks.size() == 1 && usedNextKmers.size() <= 100) { String kmer = nks.iterator().next(); lastFlank.append(kmer.charAt(kmer.length() - 1)); if (usedNextKmers.contains(kmer)) { break; } usedNextKmers.add(kmer); nks = CortexUtils.getNextKmers(cg, kmer, 0); } contig = firstFlank.toString() + contig + lastFlank.toString(); DirectedGraph<CtxVertex, MultiEdge> g = new DefaultDirectedGraph<CtxVertex, MultiEdge>(MultiEdge.class); for (int i = 0; i <= contig.length() - cg.getKmerSize(); i++) { String curKmer = contig.substring(i, i + cg.getKmerSize()); CortexKmer ck = new CortexKmer(curKmer); CtxVertex curVer = new CtxVertex( curKmer, i, contigKmers.contains(curKmer) ? VertexType.CONTIG : VertexType.CLIPPED, cg.findRecord(ck)); g.addVertex(curVer); String expectedPrevKmer = (i > 0) ? contig.substring(i - 1, i - 1 + cg.getKmerSize()) : ""; String expectedNextKmer = (i < contig.length() - cg.getKmerSize()) ? contig.substring(i + 1, i + 1 + cg.getKmerSize()) : ""; Set<String> prevKmers = CortexUtils.getPrevKmers(cg, curKmer, 0); for (String prevKmer : prevKmers) { if (!expectedPrevKmer.equals(prevKmer)) { CortexKmer pk = new CortexKmer(prevKmer); CtxVertex prevVer = new CtxVertex(prevKmer, i - 1, VertexType.IN, cg.findRecord(pk)); MultiEdge me = g.containsEdge(prevVer, curVer) ? g.getEdge(prevVer, curVer) : new MultiEdge(); me.addGraphName(cg.getCortexFile().getName()); g.addVertex(prevVer); g.addEdge(prevVer, curVer, me); } } Set<String> nextKmers = CortexUtils.getNextKmers(cg, curKmer, 0); for (String nextKmer : nextKmers) { if (!expectedNextKmer.equals(nextKmer)) { CortexKmer nk = new CortexKmer(nextKmer); CtxVertex nextVer = new CtxVertex(nextKmer, i + 1, VertexType.OUT, cg.findRecord(nk)); MultiEdge me = g.containsEdge(curVer, nextVer) ? g.getEdge(curVer, nextVer) : new MultiEdge(); me.addGraphName(cg.getCortexFile().getName()); g.addVertex(nextVer); g.addEdge(curVer, nextVer, me); } } } Set<Map<String, Object>> verticesWithLinks = new HashSet<Map<String, Object>>(); DataFrame<String, String, Integer> hv = new DataFrame<String, String, Integer>(0); for (int q = 0; q <= contig.length() - cg.getKmerSize(); q++) { // String sk = cv.getBinaryKmer(); String sk = contig.substring(q, q + cg.getKmerSize()); CortexKmer ck = new CortexKmer(sk); for (CortexLinksMap link : links) { if (link.containsKey(ck)) { CortexLinksRecord clr = link.get(ck); Map<String, Integer> lc = (!showLinks) ? new HashMap<String, Integer>() : CortexUtils.getKmersAndCoverageInLink(cg, sk, clr); Map<String, Object> entry = new HashMap<String, Object>(); entry.put("kmer", sk); entry.put("lc", lc); verticesWithLinks.add(entry); if (showLinks) { for (CortexJunctionsRecord cjr : clr.getJunctions()) { List<String> lk = CortexUtils.getKmersInLink(cg, sk, cjr); for (int i = 0; i < lk.size(); i++) { String kili = lk.get(i); for (int j = 0; j < lk.size(); j++) { String kilj = lk.get(j); if (i != j) { hv.set(kili, kilj, hv.get(kili, kilj) + cjr.getCoverage(0)); } } } } } } } } /* int hvMax = 0; Map<String, Integer> hvlin = new HashMap<String, Integer>(); if (showLinks) { for (String kili : hv.getRowNames()) { for (String kilj : hv.getColNames()) { int cov = hv.get(kili, kilj); String id = kili + "_" + kilj; hvlin.put(id, cov); if (cov > hvMax) { hvMax = cov; } } } } */ JSONObject jo = new JSONObject(); jo.put("contig", contig); jo.put("originalContig", originalContig); jo.put("ref", refFormattedString); jo.put("kmerOrigin", kmerOrigin); jo.put("kmerSize", cg.getKmerSize()); jo.put("clipStart", firstFlank.length()); jo.put("clipEnd", contig.length() - lastFlank.length()); List<Map<String, Object>> va = new ArrayList<Map<String, Object>>(); for (CtxVertex v : g.vertexSet()) { Map<String, Object> vm = new HashMap<String, Object>(); vm.put("base", v.getBase()); vm.put("kmer", v.getKmer()); vm.put("pos", v.getPos()); vm.put("type", v.getVertexType().name()); vm.put("missing", v.isMissingFromGraph()); vm.put("cov", v.getCoverage()); va.add(vm); } jo.put("vertices", va); jo.put("verticesWithLinks", verticesWithLinks); // jo.put("hvlin", hvlin); // jo.put("hvmax", hvMax); return jo.toString(); } return null; }
public void printAlignment(final byte[] ref, final byte[] read, final int width) { final StringBuilder bread = new StringBuilder(); final StringBuilder bref = new StringBuilder(); final StringBuilder match = new StringBuilder(); int i = 0; int j = 0; final int offset = getAlignmentStart2wrt1(); Cigar cigar = getCigar(); if (overhangStrategy != OverhangStrategy.SOFTCLIP) { // we need to go through all the hassle below only if we do not do softclipping; // otherwise offset is never negative if (offset < 0) { for (; j < (-offset); j++) { bread.append((char) read[j]); bref.append(' '); match.append(' '); } // at negative offsets, our cigar's first element carries overhanging bases // that we have just printed above. Tweak the first element to // exclude those bases. Here we create a new list of cigar elements, so the original // list/original cigar are unchanged (they are unmodifiable anyway!) final List<CigarElement> tweaked = new ArrayList<>(); tweaked.addAll(cigar.getCigarElements()); tweaked.set( 0, new CigarElement( cigar.getCigarElement(0).getLength() + offset, cigar.getCigarElement(0).getOperator())); cigar = new Cigar(tweaked); } } if (offset > 0) { // note: the way this implementation works, cigar will ever start from S *only* if // read starts before the ref, i.e. offset = 0 for (; i < getAlignmentStart2wrt1(); i++) { bref.append((char) ref[i]); bread.append(' '); match.append(' '); } } for (final CigarElement e : cigar.getCigarElements()) { switch (e.getOperator()) { case M: for (int z = 0; z < e.getLength(); z++, i++, j++) { bref.append((i < ref.length) ? (char) ref[i] : ' '); bread.append((j < read.length) ? (char) read[j] : ' '); match.append( (i < ref.length && j < read.length) ? (ref[i] == read[j] ? '.' : '*') : ' '); } break; case I: for (int z = 0; z < e.getLength(); z++, j++) { bref.append('-'); bread.append((char) read[j]); match.append('I'); } break; case S: for (int z = 0; z < e.getLength(); z++, j++) { bref.append(' '); bread.append((char) read[j]); match.append('S'); } break; case D: for (int z = 0; z < e.getLength(); z++, i++) { bref.append((char) ref[i]); bread.append('-'); match.append('D'); } break; default: throw new GATKException("Unexpected Cigar element:" + e.getOperator()); } } for (; i < ref.length; i++) bref.append((char) ref[i]); for (; j < read.length; j++) bread.append((char) read[j]); int pos = 0; final int maxlength = Math.max(match.length(), Math.max(bread.length(), bref.length())); while (pos < maxlength) { print_cautiously(match, pos, width); print_cautiously(bread, pos, width); print_cautiously(bref, pos, width); System.out.println(); pos += width; } }