Beispiel #1
0
  /**
   * Finds the adaptor boundary around the read and returns the first base inside the adaptor that
   * is closest to the read boundary. If the read is in the positive strand, this is the first base
   * after the end of the fragment (Picard calls it 'insert'), if the read is in the negative
   * strand, this is the first base before the beginning of the fragment.
   *
   * <p>There are two cases we need to treat here:
   *
   * <p>1) Our read is in the reverse strand :
   *
   * <p><----------------------| * |--------------------->
   *
   * <p>in these cases, the adaptor boundary is at the mate start (minus one)
   *
   * <p>2) Our read is in the forward strand :
   *
   * <p>|----------------------> * <----------------------|
   *
   * <p>in these cases the adaptor boundary is at the start of the read plus the inferred insert
   * size (plus one)
   *
   * @param read the read being tested for the adaptor boundary
   * @return the reference coordinate for the adaptor boundary (effectively the first base IN the
   *     adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another
   *     contig.
   */
  public static Integer getAdaptorBoundary(final SAMRecord read) {
    final int MAXIMUM_ADAPTOR_LENGTH = 8;
    final int insertSize =
        Math.abs(
            read
                .getInferredInsertSize()); // the inferred insert size can be negative if the mate
                                           // is mapped before the read (so we take the absolute
                                           // value)

    if (insertSize == 0
        || read
            .getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or
                                    // unmapped pairs
    return null;

    Integer
        adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first
                         // base IN the adaptor, closest to the read)
    if (read.getReadNegativeStrandFlag())
      adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header)
    else adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header)

    if ((adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH)
        || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH))
      adaptorBoundary =
          null; // we are being conservative by not allowing the adaptor boundary to go beyond what
                // we belive is the maximum size of an adaptor

    return adaptorBoundary;
  }
Beispiel #2
0
 private int compareCoordinates(final SAMRecord record1, final SAMRecord record2) {
   final int seqIndex1 = record1.getReferenceIndex();
   final int seqIndex2 = record2.getReferenceIndex();
   if (seqIndex1 == -1) {
     return ((seqIndex2 == -1) ? 0 : -1);
   } else if (seqIndex2 == -1) {
     return 1;
   }
   int result = seqIndex1 - seqIndex2;
   if (result != 0) {
     return result;
   }
   result = record1.getAlignmentStart() - record2.getAlignmentStart();
   return result;
 }
  /**
   * HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY
   *
   * @param read
   */
  public GATKSAMRecord(final SAMRecord read) {
    super(read.getHeader());
    super.setReferenceIndex(read.getReferenceIndex());
    super.setAlignmentStart(read.getAlignmentStart());
    super.setReadName(read.getReadName());
    super.setMappingQuality(read.getMappingQuality());
    // indexing bin done below
    super.setCigar(read.getCigar());
    super.setFlags(read.getFlags());
    super.setMateReferenceIndex(read.getMateReferenceIndex());
    super.setMateAlignmentStart(read.getMateAlignmentStart());
    super.setInferredInsertSize(read.getInferredInsertSize());
    SAMReadGroupRecord samRG = read.getReadGroup();
    SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read);
    if (samAttr == null) {
      clearAttributes();
    } else {
      setAttributes(samAttr);
    }
    if (samRG != null) {
      GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
      setReadGroup(rg);
    }

    super.setFileSource(read.getFileSource());
    super.setReadName(read.getReadName());
    super.setCigarString(read.getCigarString());
    super.setReadBases(read.getReadBases());
    super.setBaseQualities(read.getBaseQualities());
    // From SAMRecord constructor: Do this after the above because setCigarString will clear it.
    GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read));
  }
 /** Return the sort key used for the given sort order. Useful in error messages. */
 public String getSortKey(final SAMRecord rec) {
   switch (sortOrder) {
     case coordinate:
       return rec.getReferenceName() + ":" + rec.getAlignmentStart();
     case queryname:
       return rec.getReadName();
     case unsorted:
     default:
       return null;
   }
 }
Beispiel #5
0
    /**
     * Record any index information for a given BAM record
     *
     * @param rec The BAM record. Requires rec.getFileSource() is non-null.
     */
    public void processAlignment(final SAMRecord rec) {

      // metadata
      indexStats.recordMetaData(rec);

      if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
        return; // do nothing for records without coordinates, but count them
      }

      // various checks
      final int reference = rec.getReferenceIndex();
      if (reference != currentReference) {
        throw new SAMException(
            "Unexpected reference "
                + reference
                + " when constructing index for "
                + currentReference
                + " for record "
                + rec);
      }

      binningIndexBuilder.processFeature(
          new BinningIndexBuilder.FeatureToBeIndexed() {
            @Override
            public int getStart() {
              return rec.getAlignmentStart();
            }

            @Override
            public int getEnd() {
              return rec.getAlignmentEnd();
            }

            @Override
            public Integer getIndexingBin() {
              final Integer binNumber = rec.getIndexingBin();
              return (binNumber == null ? rec.computeIndexingBin() : binNumber);
            }

            @Override
            public Chunk getChunk() {
              final SAMFileSource source = rec.getFileSource();
              if (source == null) {
                throw new SAMException(
                    "No source (virtual file offsets); needed for indexing on BAM Record " + rec);
              }
              return ((BAMFileSpan) source.getFilePointer()).getSingleChunk();
            }
          });
    }
Beispiel #6
0
 private boolean passesFilter(
     final SAMRecord record,
     final String sequence,
     final int startPos,
     final int endPos,
     final boolean contained) {
   if (record == null) {
     return false;
   }
   if (!safeEquals(record.getReferenceName(), sequence)) {
     return false;
   }
   final int alignmentStart = record.getAlignmentStart();
   int alignmentEnd = record.getAlignmentEnd();
   if (alignmentStart <= 0) {
     assertTrue(record.getReadUnmappedFlag());
     return false;
   }
   if (alignmentEnd <= 0) {
     // For indexing-only records, treat as single base alignment.
     assertTrue(record.getReadUnmappedFlag());
     alignmentEnd = alignmentStart;
   }
   if (contained) {
     if (startPos != 0 && alignmentStart < startPos) {
       return false;
     }
     if (endPos != 0 && alignmentEnd > endPos) {
       return false;
     }
   } else {
     if (startPos != 0 && alignmentEnd < startPos) {
       return false;
     }
     if (endPos != 0 && alignmentStart > endPos) {
       return false;
     }
   }
   return true;
 }
Beispiel #7
0
  public void find_coverage(SAMResource sres) {
    int start_base = sres.region.range.start;
    int end_base = sres.region.range.end;

    int coverage_len = (end_base - start_base) + 1;
    int i, end, ref_i, read_i, len;

    int[] coverage = new int[coverage_len];
    Arrays.fill(coverage, 0);

    WorkingFile wf = null;
    if (outfile != null) {
      try {
        wf = new WorkingFile(outfile);
        ps = wf.getPrintStream();
      } catch (Exception e) {
        System.err.println("I/O error: " + e); // debug
        e.printStackTrace();
        System.exit(1);
      }
    }

    try {
      //
      //  gather coverage info:
      //
      CloseableIterator<SAMRecord> iterator = sres.get_iterator();
      int read_count = 0;
      int ref_min = -1;
      int ref_max = -1;

      while (iterator.hasNext()) {
        SAMRecord sr = iterator.next();
        read_count++;

        //	System.err.println(sr.getReadName() + ": " + sr.getAlignmentStart() + "-" +
        // sr.getAlignmentEnd());  // debug

        if (sr.getReadUnmappedFlag()) continue;
        if (sr.getDuplicateReadFlag()) {
          if (verbose_mode)
            System.err.println(
                sr.getReadName()
                    + "."
                    + (sr.getReadNegativeStrandFlag() ? "R" : "F")
                    + " ignoring, duplicate");
          continue;
        }

        byte[] read = sr.getReadBases();
        byte[] quals = sr.getBaseQualities();

        for (AlignmentBlock ab : sr.getAlignmentBlocks()) {
          len = ab.getLength();
          read_i = ab.getReadStart() - 1;
          ref_i = ab.getReferenceStart() - start_base;

          if (ref_min == -1 || ref_i < ref_min) ref_min = ref_i;

          for (i = read_i, end = read_i + len; i < end; i++, ref_i++) {
            if (ref_i >= 0 && ref_i < coverage_len) {
              if (quals[i] >= MIN_QUALITY) {
                if (verbose_mode)
                  System.err.println(
                      sr.getReadName()
                          + "."
                          + (sr.getReadNegativeStrandFlag() ? "R" : "F")
                          + " hit at "
                          + (ref_i + start_base)
                          + " as="
                          + sr.getAlignmentStart()
                          + " ae="
                          + sr.getAlignmentEnd());
                coverage[ref_i]++;
              } else if (verbose_mode) {
                System.err.println(
                    sr.getReadName()
                        + "."
                        + (sr.getReadNegativeStrandFlag() ? "R" : "F")
                        + " qual_reject at "
                        + (ref_i + start_base)
                        + " as="
                        + sr.getAlignmentStart()
                        + " ae="
                        + sr.getAlignmentEnd());
              }
            }
          }
          if (ref_max == -1 || ref_i > ref_max) ref_max = ref_i;
        }
      }
      sres.close();
      System.err.println(
          "records:"
              + read_count
              + " ref_min:"
              + (ref_min + start_base)
              + " ref_max:"
              + (ref_max + start_base)); // debug

      //
      //  report coverage info:
      //
      for (i = 0; i < coverage.length; i++) {
        if (name != null) ps.print(name + ",");
        ps.println((i + start_base) + "," + coverage[i]); // debug
      }
      if (wf != null) wf.finish();

    } catch (Exception e) {
      System.err.println("ERROR: " + e); // debug
      e.printStackTrace();
    }
  }
Beispiel #8
0
  public static void align(
      Graph graph,
      SAMRecord rec,
      Node recNode,
      ReferenceSequence sequence,
      SAMProgramRecord programRecord,
      int offset,
      AlleleCoverageCutoffs alleleCoverageCutoffs,
      boolean correctBases,
      boolean useSequenceQualities,
      int MAXIMUM_TOTAL_COVERAGE,
      int MAX_HEAP_SIZE)
      throws Exception {

    int i;
    AlignHeapNode curAlignHeapNode = null;
    AlignHeapNode nextAlignHeapNode = null;
    AlignHeapNode bestAlignHeapNode = null;
    AlignHeap heap = null;
    String read = null; // could be cs
    String readBases = null; // always nt
    String qualities = null; // could be cq
    SRMAUtil.Space space = SRMAUtil.Space.NTSPACE;
    ListIterator<NodeRecord> iter = null;
    AlignHeapNodeComparator comp = null;
    int alignmentStart = -1;
    int numStartNodesAdded = 0;
    boolean strand = rec.getReadNegativeStrandFlag(); // false -> forward, true -> reverse
    String softClipStartBases = null;
    String softClipStartQualities = null;
    String softClipEndBases = null;
    String softClipEndQualities = null;

    // Debugging stuff
    String readName = rec.getReadName();

    assert SRMAUtil.Space.COLORSPACE != space;

    // Get space
    read = (String) rec.getAttribute("CS");
    if (null == read) {
      // Use base space
      space = SRMAUtil.Space.NTSPACE;
    } else {
      // assumes CS and CQ are always in sequencing order
      space = SRMAUtil.Space.COLORSPACE;
    }

    // Get read and qualities
    if (space == SRMAUtil.Space.NTSPACE) {
      byte tmpRead[] = rec.getReadString().getBytes();
      byte tmpQualities[] = rec.getBaseQualityString().getBytes();
      // Reverse once
      if (strand) { // reverse
        SAMRecordUtil.reverseArray(tmpRead);
        SAMRecordUtil.reverseArray(tmpQualities);
      }
      read = new String(tmpRead);
      readBases = new String(tmpRead);
      qualities = new String(tmpQualities);
      // Reverse again
      if (strand) { // reverse
        SAMRecordUtil.reverseArray(tmpRead);
        SAMRecordUtil.reverseArray(tmpQualities);
      }
    } else {
      byte tmpRead[] = rec.getReadString().getBytes();
      // Reverse once
      if (strand) { // reverse
        SAMRecordUtil.reverseArray(tmpRead);
      }
      readBases = new String(tmpRead);
      // Reverse again
      if (strand) { // reverse
        SAMRecordUtil.reverseArray(tmpRead);
      }
      read = SRMAUtil.normalizeColorSpaceRead(read);
      qualities = (String) rec.getAttribute("CQ");
      // Some aligners include a quality value for the adapter.  A quality value
      // IMHO should not be given for an unobserved (assumed) peice of data.  Trim
      // the first quality in this case
      if (qualities.length() == 1 + read.length()) { // trim the first quality
        qualities = qualities.substring(1);
      }
    }
    // Reverse back
    if (readBases.length() <= 0) {
      throw new Exception("Error.  The current alignment has no bases.");
    }
    if (read.length() <= 0) {
      throw new Exception("Error.  The current alignment has no bases.");
    }
    if (qualities.length() <= 0) {
      throw new Exception("Error.  The current alignment has no qualities.");
    }
    if (readBases.length() != read.length()) {
      if (space == SRMAUtil.Space.COLORSPACE) {
        throw new Exception(
            "Error.  The current alignment's read bases length does not match the length of the colors in the CS tag ["
                + rec.getReadName()
                + "].");
      } else {
        throw new Exception("Error.  Internal error: readBases.length() != read.length()");
      }
    }

    // Deal with soft-clipping
    // - save the soft clipped sequence for latter
    {
      List<CigarElement> cigarElements = null;

      cigarElements = rec.getCigar().getCigarElements();
      CigarElement e1 = cigarElements.get(0); // first
      CigarElement e2 = cigarElements.get(cigarElements.size() - 1); // last

      // Soft-clipped
      if (CigarOperator.S == e1.getOperator()) {
        if (space == SRMAUtil.Space.COLORSPACE) {
          throw new Exception(
              "Error.  Soft clipping with color-space data not currently supported.");
        }
        int l = e1.getLength();
        if (strand) { // reverse
          softClipStartBases = readBases.substring(readBases.length() - l);
          softClipStartQualities = qualities.substring(qualities.length() - l);
          readBases = readBases.substring(0, readBases.length() - l);
          read = read.substring(0, read.length() - l);
          qualities = qualities.substring(0, qualities.length() - l);
        } else {
          softClipStartBases = readBases.substring(0, l - 1);
          softClipStartQualities = qualities.substring(0, l - 1);
          readBases = readBases.substring(l);
          read = read.substring(l);
          qualities = qualities.substring(l);
        }
      }
      if (CigarOperator.S == e2.getOperator()) {
        if (space == SRMAUtil.Space.COLORSPACE) {
          throw new Exception(
              "Error.  Soft clipping with color-space data not currently supported.");
        }
        int l = e2.getLength();
        if (strand) { // reverse
          softClipEndBases = readBases.substring(0, l - 1);
          softClipEndQualities = qualities.substring(0, l - 1);
          readBases = readBases.substring(l);
          read = read.substring(l);
          qualities = qualities.substring(l);
        } else {
          softClipEndBases = readBases.substring(readBases.length() - l);
          softClipEndQualities = qualities.substring(qualities.length() - l);
          readBases = readBases.substring(0, readBases.length() - l);
          read = read.substring(0, read.length() - l);
          qualities = qualities.substring(0, qualities.length() - l);
        }
      }
    }

    // Remove mate pair information
    Align.removeMateInfo(rec);

    comp =
        new AlignHeapNodeComparator(
            (strand) ? AlignHeap.HeapType.MAXHEAP : AlignHeap.HeapType.MINHEAP);

    // Bound by original alignment if possible
    bestAlignHeapNode =
        Align.boundWithOriginalAlignment(
            rec,
            graph,
            recNode,
            comp,
            strand,
            read,
            qualities,
            readBases,
            space,
            sequence,
            alleleCoverageCutoffs,
            useSequenceQualities,
            MAXIMUM_TOTAL_COVERAGE,
            MAX_HEAP_SIZE);

    /*
    System.err.println("readName="+rec.getReadName());
    if(null != bestAlignHeapNode) {
    System.err.println("\nFOUND BEST:" + rec.toString());
    }
    else {
    System.err.println("\nNOT FOUND (BEST): " + rec.toString());
    }
    Align.updateSAM(rec, programRecord, bestAlignHeapNode, space, read, qualities, softClipStartBases, softClipStartQualities, softClipEndBases, softClipEndQualities, strand, correctBases);
    return;
    */

    heap = new AlignHeap((strand) ? AlignHeap.HeapType.MAXHEAP : AlignHeap.HeapType.MINHEAP);

    // Add start nodes
    if (strand) { // reverse
      alignmentStart = rec.getAlignmentEnd();
      for (i = alignmentStart + offset; alignmentStart - offset <= i; i--) {
        int position = graph.getPriorityQueueIndexAtPositionOrBefore(i);
        PriorityQueue<Node> startNodeQueue = graph.getPriorityQueue(position);
        if (0 != position && null != startNodeQueue) {
          Iterator<Node> startNodeQueueIter = startNodeQueue.iterator();
          while (startNodeQueueIter.hasNext()) {
            Node startNode = startNodeQueueIter.next();
            int f = passFilters(graph, startNode, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE);
            if (0 == f) {
              heap.add(
                  new AlignHeapNode(
                      null,
                      startNode,
                      startNode.coverage,
                      read.charAt(0),
                      qualities.charAt(0),
                      useSequenceQualities,
                      space));
            } else if (f < 0) {
              return;
            }
            if (startNode.position < i) {
              i = startNode.position;
            }
            numStartNodesAdded++;
          }
        }
      }
    } else {
      alignmentStart = rec.getAlignmentStart();
      for (i = alignmentStart - offset; i <= alignmentStart + offset; i++) {
        int position = graph.getPriorityQueueIndexAtPositionOrGreater(i);
        PriorityQueue<Node> startNodeQueue = graph.getPriorityQueue(position);
        if (0 != position && null != startNodeQueue) {
          Iterator<Node> startNodeQueueIter = startNodeQueue.iterator();
          while (startNodeQueueIter.hasNext()) {
            Node startNode = startNodeQueueIter.next();
            int f = passFilters(graph, startNode, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE);
            if (0 == f) {
              heap.add(
                  new AlignHeapNode(
                      null,
                      startNode,
                      startNode.coverage,
                      read.charAt(0),
                      qualities.charAt(0),
                      useSequenceQualities,
                      space));
            } else if (f < 0) {
              return;
            }
            if (i < startNode.position) {
              i = startNode.position;
            }
            numStartNodesAdded++;
          }
        }
      }
    }
    if (numStartNodesAdded == 0) {
      throw new Exception("Did not add any start nodes!");
    }

    // Get first node off the heap
    curAlignHeapNode = heap.poll();

    while (null != curAlignHeapNode) {

      if (MAX_HEAP_SIZE <= heap.size()) {
        // too many to consider
        return;
      }

      // System.err.println("strand:" + strand + "\tsize:" + heap.size() + "\talignmentStart:" +
      // alignmentStart + "\toffset:" + offset + "\treadOffset:" + curAlignHeapNode.readOffset);
      // System.err.print("size:" + heap.size() + ":" + curAlignHeapNode.readOffset + ":" +
      // curAlignHeapNode.score + ":" + curAlignHeapNode.alleleCoverageSum + ":" +
      // curAlignHeapNode.startPosition + "\t");
      // curAlignHeapNode.node.print(System.err);
      // System.err.print("\rposition:" + curAlignHeapNode.node.position + "\treadOffset:" +
      // curAlignHeapNode.readOffset);

      // Remove all non-insertions with the same contig/pos/read-offset/type/base and lower score
      nextAlignHeapNode = heap.peek();
      while (Node.INSERTION != curAlignHeapNode.node.type
          && null != nextAlignHeapNode
          && 0 == comp.compare(curAlignHeapNode, nextAlignHeapNode)) {
        if (curAlignHeapNode.score < nextAlignHeapNode.score
            || (curAlignHeapNode.score == nextAlignHeapNode.score
                && curAlignHeapNode.alleleCoverageSum < nextAlignHeapNode.alleleCoverageSum)) {
          // Update current node
          curAlignHeapNode = heap.poll();
        } else {
          // Ignore next node
          heap.poll();
        }
        nextAlignHeapNode = heap.peek();
      }
      nextAlignHeapNode = null;

      // Check if the alignment is complete
      if (curAlignHeapNode.readOffset == read.length() - 1) {
        // All read bases examined, store if has the best alignment.

        // System.err.print(curAlignHeapNode.alleleCoverageSum + ":" + curAlignHeapNode.score +
        // ":");
        // System.err.print(curAlignHeapNode.startPosition + ":");
        // curAlignHeapNode.node.print(System.err);

        if (null == bestAlignHeapNode
            || bestAlignHeapNode.score < curAlignHeapNode.score
            || (bestAlignHeapNode.score == curAlignHeapNode.score
                && bestAlignHeapNode.alleleCoverageSum < curAlignHeapNode.alleleCoverageSum)) {
          bestAlignHeapNode = curAlignHeapNode;
        }
      } else if (null != bestAlignHeapNode && curAlignHeapNode.score < bestAlignHeapNode.score) {
        // ignore, under the assumption that scores can only become more negative.
      } else {
        if (strand) { // reverse
          // Go to all the "prev" nodes
          iter = curAlignHeapNode.node.prev.listIterator();
        } else { // forward
          // Go to all "next" nodes
          iter = curAlignHeapNode.node.next.listIterator();
        }
        while (iter.hasNext()) {
          NodeRecord next = iter.next();
          int f =
              passFilters(
                  graph, next.node, next.coverage, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE);
          if (0 == f) {
            heap.add(
                new AlignHeapNode(
                    curAlignHeapNode,
                    next.node,
                    next.coverage,
                    read.charAt(curAlignHeapNode.readOffset + 1),
                    qualities.charAt(curAlignHeapNode.readOffset + 1),
                    useSequenceQualities,
                    space));
          } else if (f < 0) {
            return;
          }
        }
        iter = null;
      }
      // Get next node
      curAlignHeapNode = heap.poll();
    }

    // Recover alignment
    Align.updateSAM(
        rec,
        sequence,
        programRecord,
        bestAlignHeapNode,
        space,
        read,
        qualities,
        softClipStartBases,
        softClipStartQualities,
        softClipEndBases,
        softClipEndQualities,
        strand,
        correctBases);
  }
  @Override
  public void execute() {
    log.info("Initializing kmer code map...");
    Map<Character, Integer> kmerCodeIndices = new HashMap<Character, Integer>();
    kmerCodeIndices.put('0', 1);
    kmerCodeIndices.put('A', 3);
    kmerCodeIndices.put('B', 4);
    kmerCodeIndices.put('C', 5);
    kmerCodeIndices.put('_', 6);
    kmerCodeIndices.put('.', 7);
    kmerCodeIndices.put('1', 9);

    Map<Character, String> kmerCodeNames = new LinkedHashMap<Character, String>();
    kmerCodeNames.put('0', "ref0");
    kmerCodeNames.put('A', "repetitive");
    kmerCodeNames.put('B', "both");
    kmerCodeNames.put('C', "lowcoverage");
    kmerCodeNames.put('_', "lowconfidence");
    kmerCodeNames.put('.', "novel");
    kmerCodeNames.put('1', "ref1");

    if (KMER_CODE_NAMES != null) {
      for (Character c : kmerCodeNames.keySet()) {
        String cStr = String.valueOf(c);
        if (KMER_CODE_NAMES.containsKey(cStr)) {
          kmerCodeNames.put(c, KMER_CODE_NAMES.get(cStr));
        }
      }
    }

    for (Character c : kmerCodeNames.keySet()) {
      log.info("  {} {}: {}", c, kmerCodeIndices.get(c), kmerCodeNames.get(c));
    }

    log.info("Loading annotated contigs...");
    Map<String, Map<String, String>> annotatedContigs = new HashMap<String, Map<String, String>>();
    int kmerSize = 0;

    if (ANN.length() > 0) {
      TableReader tr = new TableReader(ANN);
      for (Map<String, String> te : tr) {
        String contigName = te.get("contigName");

        if (kmerSize == 0) {
          kmerSize = te.get("seq").length() - te.get("kmerOrigin").length() + 1;
        }

        annotatedContigs.put(contigName, te);

        String[] ref0ToCanonicalExact =
            (te.get("ref0ToCanonicalExact").equals("NA")
                        || te.get("ref0ToCanonicalExact").equals("*:0-0")
                    ? "NA:0-0"
                    : te.get("ref0ToCanonicalExact"))
                .split("[:-]");
        String[] ref1ToCanonicalExact =
            (te.get("ref1ToCanonicalExact").equals("NA")
                        || te.get("ref1ToCanonicalExact").equals("*:0-0")
                    ? "NA:0-0"
                    : te.get("ref1ToCanonicalExact"))
                .split("[:-]");

        cout.println(
            te.get("sampleName")
                + "_"
                + te.get("accession")
                + "_"
                + contigName
                + " "
                + ref0ToCanonicalExact[0]
                + " "
                + ref0ToCanonicalExact[1]
                + " "
                + ref0ToCanonicalExact[2]
                + " radius1=0.8r");
        cout.println(
            te.get("sampleName")
                + "_"
                + te.get("accession")
                + "_"
                + contigName
                + " "
                + ref1ToCanonicalExact[0]
                + " "
                + ref1ToCanonicalExact[1]
                + " "
                + ref1ToCanonicalExact[2]
                + " radius2=0.6r");
      }
    }

    log.info("    contigs: {}", annotatedContigs.size());
    log.info("  kmer size: {}", kmerSize);

    log.info("Computing kmer inheritance information...");

    SAMFileHeader sfh = CONTIGS.getFileHeader();
    for (Character c : kmerCodeNames.keySet()) {
      SAMReadGroupRecord rgr = new SAMReadGroupRecord(kmerCodeNames.get(c));
      rgr.setSample(kmerCodeNames.get(c));
      sfh.addReadGroup(rgr);
    }

    SAMFileWriterFactory sfwf = new SAMFileWriterFactory();
    sfwf.setCreateIndex(true);
    SAMFileWriter sfw = sfwf.makeBAMWriter(sfh, false, bout);

    TableWriter tw = new TableWriter(sout);

    Set<IGVEntry> igvEntries = new TreeSet<IGVEntry>();
    int numContigs = 0;
    for (SAMRecord contig : CONTIGS) {
      if (CONTIG_NAMES == null
          || CONTIG_NAMES.isEmpty()
          || CONTIG_NAMES.contains(contig.getReadName())) {
        Map<String, String> te = annotatedContigs.get(contig.getReadName());

        if (annotatedContigs.containsKey(contig.getReadName())) {
          String seq = contig.getReadString();

          // log.debug("  te: {}", te);

          String annSeq = te.get("seq");
          String kmerOrigin = te.get("kmerOrigin");

          Map<CortexKmer, Character> kmerCodes = new HashMap<CortexKmer, Character>();
          for (int i = 0; i < kmerOrigin.length(); i++) {
            CortexKmer kmer = new CortexKmer(annSeq.substring(i, i + kmerSize));
            Character code = kmerOrigin.charAt(i);

            kmerCodes.put(kmer, code);
          }

          Map<Character, Integer> kmerStats = new HashMap<Character, Integer>();
          for (Character c : kmerCodeNames.keySet()) {
            kmerStats.put(c, 0);
          }

          boolean changed = false;

          // We want to be able to examine soft-clipped regions as well.
          List<CigarElement> ces = new ArrayList<CigarElement>();
          for (CigarElement ce : contig.getCigar().getCigarElements()) {
            if (ce.getOperator().equals(CigarOperator.S)) {
              ces.add(new CigarElement(ce.getLength(), CigarOperator.M));
              changed = true;
            } else {
              ces.add(ce);
            }
          }

          if (changed) {
            CigarElement firstCe = contig.getCigar().getCigarElements().get(0);

            if (firstCe.getOperator().equals(CigarOperator.S)) {
              contig.setAlignmentStart(contig.getAlignmentStart() - firstCe.getLength());
            }

            contig.setCigar(new Cigar(ces));
          }

          for (AlignmentBlock ab : contig.getAlignmentBlocks()) {
            for (int i = ab.getReadStart() - 1; i < ab.getReadStart() + ab.getLength(); i++) {
              if (i + kmerSize < seq.length()) {
                CortexKmer kmer = new CortexKmer(seq.substring(i, i + kmerSize));

                SAMRecord skmer = new SAMRecord(CONTIGS.getFileHeader());
                skmer.setReadBases(seq.substring(i, i + kmerSize).getBytes());

                List<CigarElement> cigarElements = new ArrayList<CigarElement>();
                cigarElements.add(new CigarElement(kmerSize, CigarOperator.M));
                Cigar cigar = new Cigar(cigarElements);

                skmer.setReadName(contig.getReadName() + "." + kmer.getKmerAsString());
                skmer.setReferenceName(contig.getReferenceName());
                skmer.setCigar(cigar);
                skmer.setReadPairedFlag(false);
                skmer.setDuplicateReadFlag(false);
                skmer.setMateNegativeStrandFlag(false);
                skmer.setAlignmentStart(ab.getReferenceStart() - ab.getReadStart() + 1 + i);
                skmer.setAttribute("RG", "none");
                skmer.setMappingQuality(0);

                Character c = kmerCodes.get(kmer);
                String codeName = kmerCodeNames.get(c);

                String parentReadGroupId = null;
                String sampleReadGroupId = null;
                for (SAMReadGroupRecord rgr : sfh.getReadGroups()) {
                  if (rgr.getSample().equals(codeName)) {
                    parentReadGroupId = rgr.getReadGroupId();
                  }

                  if (rgr.getSample().equals(contig.getReadGroup().getSample())) {
                    sampleReadGroupId = rgr.getReadGroupId();
                  }
                }

                skmer.setAttribute(
                    "RG", parentReadGroupId != null ? parentReadGroupId : sampleReadGroupId);
                skmer.setMappingQuality(99);

                sfw.addAlignment(skmer);

                kmerStats.put(c, kmerStats.get(c) + 1);

                IGVEntry igvEntry = new IGVEntry();
                igvEntry.chromosome = contig.getReferenceName();
                igvEntry.start = ab.getReferenceStart() - ab.getReadStart() + i;
                igvEntry.parentageName = kmerCodeNames.get(c);
                igvEntry.parentage = kmerCodeIndices.get(c);
                igvEntries.add(igvEntry);
              }
            }
          }

          if (!contig.isSecondaryOrSupplementary()) {
            beout.println(
                contig.getReferenceName()
                    + "\t"
                    + contig.getAlignmentStart()
                    + "\t"
                    + contig.getAlignmentEnd()
                    + "\t"
                    + contig.getReadName()
                    + "."
                    + contig.getReadGroup().getSample());

            if (annotatedContigs.size() > 10 && numContigs % (annotatedContigs.size() / 10) == 0) {
              log.info("  processed {}/{} contigs", numContigs, annotatedContigs.size());
            }
            numContigs++;
          }

          Map<String, String> stats = new LinkedHashMap<String, String>();
          stats.put("contigName", contig.getReadName());
          stats.put("sampleName", contig.getReadGroup().getSample());
          for (Character c : kmerCodeNames.keySet()) {
            stats.put(kmerCodeNames.get(c), String.valueOf(kmerStats.get(c)));
          }
          tw.addEntry(stats);
        }
      }
    }

    log.info("Writing kmer inheritance information...");
    out.printf("%s\t%s\t%s\t%s\t%s\n", "Chromosome", "Start", "End", "Feature", "Parentage");
    for (IGVEntry igvEntry : igvEntries) {
      out.printf(
          "%s\t%d\t%d\t%s\t%d\n",
          igvEntry.chromosome,
          igvEntry.start,
          igvEntry.start + 1,
          igvEntry.parentageName,
          igvEntry.parentage);
    }

    sfw.close();
  }