/**
   * @param read a read containing the variant
   * @return the number of hard clipped and low qual bases at the read start (where start is the
   *     leftmost end w.r.t. the reference)
   */
  public static int getNumClippedBasesAtStart(final SAMRecord read) {
    // check for hard clips (never consider these bases):
    final Cigar c = read.getCigar();
    final CigarElement first = c.getCigarElement(0);

    int numStartClippedBases = 0;
    if (first.getOperator() == CigarOperator.H) {
      numStartClippedBases = first.getLength();
    }
    final byte[] unclippedReadBases = read.getReadBases();
    final byte[] unclippedReadQuals = read.getBaseQualities();

    // Do a stricter base clipping than provided by CIGAR string, since this one may be too
    // conservative,
    // and may leave a string of Q2 bases still hanging off the reads.
    // TODO: this code may not even get used because HaplotypeCaller already hard clips low quality
    // tails
    for (int i = numStartClippedBases; i < unclippedReadBases.length; i++) {
      if (unclippedReadQuals[i] < PairHMMIndelErrorModel.BASE_QUAL_THRESHOLD)
        numStartClippedBases++;
      else break;
    }

    return numStartClippedBases;
  }
Exemple #2
0
    @Override
    public String process(File page, Map<String, String> query) {
      loadContigs();

      if (query.get("contigName").matches("^[ACGT]+$")) {
        contigs.put("manual", query.get("contigName"));
        query.put("contigName", "manual");
      } else if (query.get("contigName").matches("^Pf3D7.+$")) {
        String[] pieces = query.get("contigName").split("[:-]");

        int start = Integer.valueOf(pieces[1].replaceAll(",", ""));
        int end = Integer.valueOf(pieces[2].replaceAll(",", ""));

        ReferenceSequence rseq = REF.getSubsequenceAt(pieces[0], start, end);
        contigs.put("manual", new String(rseq.getBases()));
        query.put("contigName", "manual");
      }

      if (query.containsKey("contigName")
          && contigs.containsKey(query.get("contigName"))
          && graphs.containsKey(query.get("graphName"))) {
        boolean showLinks = query.get("showLinks").equals("links_on");

        String contig = contigs.get(query.get("contigName"));
        String originalContig = contigs.get(query.get("contigName"));
        String refFormattedString = "";
        String kmerOrigin = "";

        if (metrics.containsKey(query.get("contigName"))) {
          String[] loc = metrics.get(query.get("contigName")).get("canonicalLocus").split("[:-]");
          if (!loc[0].equals("*")) {
            boolean isRc = metrics.get(query.get("contigName")).get("isRcCanonical").equals("1");

            if (isRc) {
              contig = SequenceUtils.reverseComplement(contig);
              originalContig = SequenceUtils.reverseComplement(originalContig);
            }

            int locStart = Integer.valueOf(loc[1]);
            int locEnd = Integer.valueOf(loc[2]);

            Cigar cigar =
                cigarStringToCigar(metrics.get(query.get("contigName")).get("cigarCanonical"));
            if (cigar.getCigarElement(0).getOperator().equals(CigarOperator.S)) {
              locStart -= cigar.getCigarElement(0).getLength();
            }

            if (cigar
                .getCigarElement(cigar.getCigarElements().size() - 1)
                .getOperator()
                .equals(CigarOperator.S)) {
              locEnd += cigar.getCigarElement(cigar.getCigarElements().size() - 1).getLength();
            }

            String ref = new String(REF.getSubsequenceAt(loc[0], locStart, locEnd).getBases());

            StringBuilder refFormatted = new StringBuilder();
            int pos = 0;
            for (CigarElement ce : cigar.getCigarElements()) {
              CigarOperator co = ce.getOperator();
              switch (co) {
                case S:
                  refFormatted.append(ref.substring(pos, pos + ce.getLength()).toLowerCase());
                  break;
                case M:
                  refFormatted.append(ref.substring(pos, pos + ce.getLength()));
                  break;
                case I:
                  refFormatted.append(StringUtils.repeat("-", ce.getLength()));
                  break;
              }

              if (ce.getOperator().consumesReferenceBases()) {
                pos += ce.getLength();
              }
            }

            refFormattedString = refFormatted.toString();

            kmerOrigin = metrics.get(query.get("contigName")).get("kmerOrigin");
          }
        }

        CortexGraph cg = graphs.get(query.get("graphName"));

        String sampleName = cg.getColor(0).getSampleName();
        Set<CortexLinksMap> links = new HashSet<CortexLinksMap>();
        if (LINKS != null && !LINKS.isEmpty()) {
          for (CortexLinksMap link : LINKS) {
            if (sampleName.equals(link.getCortexLinks().getColor(0).getSampleName())) {
              links.add(link);
            }
          }
        }

        Set<String> contigKmers = new HashSet<String>();
        for (int i = 0; i <= contig.length() - cg.getKmerSize(); i++) {
          String curKmer = contig.substring(i, i + cg.getKmerSize());

          contigKmers.add(curKmer);
        }

        StringBuilder firstFlank = new StringBuilder();
        String firstKmer = contig.substring(0, cg.getKmerSize());
        Set<String> pks = CortexUtils.getPrevKmers(cg, firstKmer, 0);
        Set<String> usedPrevKmers = new HashSet<String>();
        usedPrevKmers.add(firstKmer);
        while (pks.size() == 1 && usedPrevKmers.size() <= 100) {
          String kmer = pks.iterator().next();
          firstFlank.insert(0, kmer.charAt(0));

          if (usedPrevKmers.contains(kmer)) {
            break;
          }
          usedPrevKmers.add(kmer);

          pks = CortexUtils.getPrevKmers(cg, kmer, 0);
        }

        StringBuilder lastFlank = new StringBuilder();
        String lastKmer = contig.substring(contig.length() - cg.getKmerSize(), contig.length());
        Set<String> nks = CortexUtils.getNextKmers(cg, lastKmer, 0);
        Set<String> usedNextKmers = new HashSet<String>();
        usedNextKmers.add(lastKmer);
        while (nks.size() == 1 && usedNextKmers.size() <= 100) {
          String kmer = nks.iterator().next();
          lastFlank.append(kmer.charAt(kmer.length() - 1));

          if (usedNextKmers.contains(kmer)) {
            break;
          }
          usedNextKmers.add(kmer);

          nks = CortexUtils.getNextKmers(cg, kmer, 0);
        }

        contig = firstFlank.toString() + contig + lastFlank.toString();

        DirectedGraph<CtxVertex, MultiEdge> g =
            new DefaultDirectedGraph<CtxVertex, MultiEdge>(MultiEdge.class);
        for (int i = 0; i <= contig.length() - cg.getKmerSize(); i++) {
          String curKmer = contig.substring(i, i + cg.getKmerSize());
          CortexKmer ck = new CortexKmer(curKmer);
          CtxVertex curVer =
              new CtxVertex(
                  curKmer,
                  i,
                  contigKmers.contains(curKmer) ? VertexType.CONTIG : VertexType.CLIPPED,
                  cg.findRecord(ck));

          g.addVertex(curVer);

          String expectedPrevKmer =
              (i > 0) ? contig.substring(i - 1, i - 1 + cg.getKmerSize()) : "";
          String expectedNextKmer =
              (i < contig.length() - cg.getKmerSize())
                  ? contig.substring(i + 1, i + 1 + cg.getKmerSize())
                  : "";

          Set<String> prevKmers = CortexUtils.getPrevKmers(cg, curKmer, 0);
          for (String prevKmer : prevKmers) {
            if (!expectedPrevKmer.equals(prevKmer)) {
              CortexKmer pk = new CortexKmer(prevKmer);
              CtxVertex prevVer = new CtxVertex(prevKmer, i - 1, VertexType.IN, cg.findRecord(pk));

              MultiEdge me =
                  g.containsEdge(prevVer, curVer) ? g.getEdge(prevVer, curVer) : new MultiEdge();
              me.addGraphName(cg.getCortexFile().getName());

              g.addVertex(prevVer);
              g.addEdge(prevVer, curVer, me);
            }
          }

          Set<String> nextKmers = CortexUtils.getNextKmers(cg, curKmer, 0);
          for (String nextKmer : nextKmers) {
            if (!expectedNextKmer.equals(nextKmer)) {
              CortexKmer nk = new CortexKmer(nextKmer);
              CtxVertex nextVer = new CtxVertex(nextKmer, i + 1, VertexType.OUT, cg.findRecord(nk));

              MultiEdge me =
                  g.containsEdge(curVer, nextVer) ? g.getEdge(curVer, nextVer) : new MultiEdge();
              me.addGraphName(cg.getCortexFile().getName());

              g.addVertex(nextVer);
              g.addEdge(curVer, nextVer, me);
            }
          }
        }

        Set<Map<String, Object>> verticesWithLinks = new HashSet<Map<String, Object>>();
        DataFrame<String, String, Integer> hv = new DataFrame<String, String, Integer>(0);

        for (int q = 0; q <= contig.length() - cg.getKmerSize(); q++) {
          // String sk = cv.getBinaryKmer();
          String sk = contig.substring(q, q + cg.getKmerSize());
          CortexKmer ck = new CortexKmer(sk);

          for (CortexLinksMap link : links) {
            if (link.containsKey(ck)) {
              CortexLinksRecord clr = link.get(ck);
              Map<String, Integer> lc =
                  (!showLinks)
                      ? new HashMap<String, Integer>()
                      : CortexUtils.getKmersAndCoverageInLink(cg, sk, clr);

              Map<String, Object> entry = new HashMap<String, Object>();
              entry.put("kmer", sk);
              entry.put("lc", lc);

              verticesWithLinks.add(entry);

              if (showLinks) {
                for (CortexJunctionsRecord cjr : clr.getJunctions()) {
                  List<String> lk = CortexUtils.getKmersInLink(cg, sk, cjr);

                  for (int i = 0; i < lk.size(); i++) {
                    String kili = lk.get(i);

                    for (int j = 0; j < lk.size(); j++) {
                      String kilj = lk.get(j);

                      if (i != j) {
                        hv.set(kili, kilj, hv.get(kili, kilj) + cjr.getCoverage(0));
                      }
                    }
                  }
                }
              }
            }
          }
        }

        /*
        int hvMax = 0;
        Map<String, Integer> hvlin = new HashMap<String, Integer>();
        if (showLinks) {
            for (String kili : hv.getRowNames()) {
                for (String kilj : hv.getColNames()) {
                    int cov = hv.get(kili, kilj);

                    String id = kili + "_" + kilj;
                    hvlin.put(id, cov);

                    if (cov > hvMax) {
                        hvMax = cov;
                    }
                }
            }
        }
        */

        JSONObject jo = new JSONObject();
        jo.put("contig", contig);
        jo.put("originalContig", originalContig);
        jo.put("ref", refFormattedString);
        jo.put("kmerOrigin", kmerOrigin);
        jo.put("kmerSize", cg.getKmerSize());
        jo.put("clipStart", firstFlank.length());
        jo.put("clipEnd", contig.length() - lastFlank.length());

        List<Map<String, Object>> va = new ArrayList<Map<String, Object>>();
        for (CtxVertex v : g.vertexSet()) {
          Map<String, Object> vm = new HashMap<String, Object>();
          vm.put("base", v.getBase());
          vm.put("kmer", v.getKmer());
          vm.put("pos", v.getPos());
          vm.put("type", v.getVertexType().name());
          vm.put("missing", v.isMissingFromGraph());
          vm.put("cov", v.getCoverage());

          va.add(vm);
        }

        jo.put("vertices", va);
        jo.put("verticesWithLinks", verticesWithLinks);
        // jo.put("hvlin", hvlin);
        // jo.put("hvmax", hvMax);

        return jo.toString();
      }

      return null;
    }
  public void printAlignment(final byte[] ref, final byte[] read, final int width) {
    final StringBuilder bread = new StringBuilder();
    final StringBuilder bref = new StringBuilder();
    final StringBuilder match = new StringBuilder();

    int i = 0;
    int j = 0;

    final int offset = getAlignmentStart2wrt1();

    Cigar cigar = getCigar();

    if (overhangStrategy != OverhangStrategy.SOFTCLIP) {

      // we need to go through all the hassle below only if we do not do softclipping;
      // otherwise offset is never negative
      if (offset < 0) {
        for (; j < (-offset); j++) {
          bread.append((char) read[j]);
          bref.append(' ');
          match.append(' ');
        }
        // at negative offsets, our cigar's first element carries overhanging bases
        // that we have just printed above. Tweak the first element to
        // exclude those bases. Here we create a new list of cigar elements, so the original
        // list/original cigar are unchanged (they are unmodifiable anyway!)

        final List<CigarElement> tweaked = new ArrayList<>();
        tweaked.addAll(cigar.getCigarElements());
        tweaked.set(
            0,
            new CigarElement(
                cigar.getCigarElement(0).getLength() + offset,
                cigar.getCigarElement(0).getOperator()));
        cigar = new Cigar(tweaked);
      }
    }

    if (offset
        > 0) { // note: the way this implementation works, cigar will ever start from S *only* if
      // read starts before the ref, i.e. offset = 0
      for (; i < getAlignmentStart2wrt1(); i++) {
        bref.append((char) ref[i]);
        bread.append(' ');
        match.append(' ');
      }
    }

    for (final CigarElement e : cigar.getCigarElements()) {
      switch (e.getOperator()) {
        case M:
          for (int z = 0; z < e.getLength(); z++, i++, j++) {
            bref.append((i < ref.length) ? (char) ref[i] : ' ');
            bread.append((j < read.length) ? (char) read[j] : ' ');
            match.append(
                (i < ref.length && j < read.length) ? (ref[i] == read[j] ? '.' : '*') : ' ');
          }
          break;
        case I:
          for (int z = 0; z < e.getLength(); z++, j++) {
            bref.append('-');
            bread.append((char) read[j]);
            match.append('I');
          }
          break;
        case S:
          for (int z = 0; z < e.getLength(); z++, j++) {
            bref.append(' ');
            bread.append((char) read[j]);
            match.append('S');
          }
          break;
        case D:
          for (int z = 0; z < e.getLength(); z++, i++) {
            bref.append((char) ref[i]);
            bread.append('-');
            match.append('D');
          }
          break;
        default:
          throw new GATKException("Unexpected Cigar element:" + e.getOperator());
      }
    }
    for (; i < ref.length; i++) bref.append((char) ref[i]);
    for (; j < read.length; j++) bread.append((char) read[j]);

    int pos = 0;
    final int maxlength = Math.max(match.length(), Math.max(bread.length(), bref.length()));
    while (pos < maxlength) {
      print_cautiously(match, pos, width);
      print_cautiously(bread, pos, width);
      print_cautiously(bref, pos, width);
      System.out.println();
      pos += width;
    }
  }