private void createBAM(final List<GATKSAMRecord> reads) throws IOException { testBAM = File.createTempFile("TraverseActiveRegionsUnitTest", ".bam"); testBAM.deleteOnExit(); SAMFileWriter out = new SAMFileWriterFactory() .setCreateIndex(true) .makeBAMWriter(reads.get(0).getHeader(), true, testBAM); for (GATKSAMRecord read : reads) { out.addAlignment(read); } out.close(); new File(testBAM.getAbsolutePath().replace(".bam", ".bai")).deleteOnExit(); new File(testBAM.getAbsolutePath() + ".bai").deleteOnExit(); }
public static void align( Graph graph, SAMRecord rec, Node recNode, ReferenceSequence sequence, SAMProgramRecord programRecord, int offset, AlleleCoverageCutoffs alleleCoverageCutoffs, boolean correctBases, boolean useSequenceQualities, int MAXIMUM_TOTAL_COVERAGE, int MAX_HEAP_SIZE) throws Exception { int i; AlignHeapNode curAlignHeapNode = null; AlignHeapNode nextAlignHeapNode = null; AlignHeapNode bestAlignHeapNode = null; AlignHeap heap = null; String read = null; // could be cs String readBases = null; // always nt String qualities = null; // could be cq SRMAUtil.Space space = SRMAUtil.Space.NTSPACE; ListIterator<NodeRecord> iter = null; AlignHeapNodeComparator comp = null; int alignmentStart = -1; int numStartNodesAdded = 0; boolean strand = rec.getReadNegativeStrandFlag(); // false -> forward, true -> reverse String softClipStartBases = null; String softClipStartQualities = null; String softClipEndBases = null; String softClipEndQualities = null; // Debugging stuff String readName = rec.getReadName(); assert SRMAUtil.Space.COLORSPACE != space; // Get space read = (String) rec.getAttribute("CS"); if (null == read) { // Use base space space = SRMAUtil.Space.NTSPACE; } else { // assumes CS and CQ are always in sequencing order space = SRMAUtil.Space.COLORSPACE; } // Get read and qualities if (space == SRMAUtil.Space.NTSPACE) { byte tmpRead[] = rec.getReadString().getBytes(); byte tmpQualities[] = rec.getBaseQualityString().getBytes(); // Reverse once if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); SAMRecordUtil.reverseArray(tmpQualities); } read = new String(tmpRead); readBases = new String(tmpRead); qualities = new String(tmpQualities); // Reverse again if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); SAMRecordUtil.reverseArray(tmpQualities); } } else { byte tmpRead[] = rec.getReadString().getBytes(); // Reverse once if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); } readBases = new String(tmpRead); // Reverse again if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); } read = SRMAUtil.normalizeColorSpaceRead(read); qualities = (String) rec.getAttribute("CQ"); // Some aligners include a quality value for the adapter. A quality value // IMHO should not be given for an unobserved (assumed) peice of data. Trim // the first quality in this case if (qualities.length() == 1 + read.length()) { // trim the first quality qualities = qualities.substring(1); } } // Reverse back if (readBases.length() <= 0) { throw new Exception("Error. The current alignment has no bases."); } if (read.length() <= 0) { throw new Exception("Error. The current alignment has no bases."); } if (qualities.length() <= 0) { throw new Exception("Error. The current alignment has no qualities."); } if (readBases.length() != read.length()) { if (space == SRMAUtil.Space.COLORSPACE) { throw new Exception( "Error. The current alignment's read bases length does not match the length of the colors in the CS tag [" + rec.getReadName() + "]."); } else { throw new Exception("Error. Internal error: readBases.length() != read.length()"); } } // Deal with soft-clipping // - save the soft clipped sequence for latter { List<CigarElement> cigarElements = null; cigarElements = rec.getCigar().getCigarElements(); CigarElement e1 = cigarElements.get(0); // first CigarElement e2 = cigarElements.get(cigarElements.size() - 1); // last // Soft-clipped if (CigarOperator.S == e1.getOperator()) { if (space == SRMAUtil.Space.COLORSPACE) { throw new Exception( "Error. Soft clipping with color-space data not currently supported."); } int l = e1.getLength(); if (strand) { // reverse softClipStartBases = readBases.substring(readBases.length() - l); softClipStartQualities = qualities.substring(qualities.length() - l); readBases = readBases.substring(0, readBases.length() - l); read = read.substring(0, read.length() - l); qualities = qualities.substring(0, qualities.length() - l); } else { softClipStartBases = readBases.substring(0, l - 1); softClipStartQualities = qualities.substring(0, l - 1); readBases = readBases.substring(l); read = read.substring(l); qualities = qualities.substring(l); } } if (CigarOperator.S == e2.getOperator()) { if (space == SRMAUtil.Space.COLORSPACE) { throw new Exception( "Error. Soft clipping with color-space data not currently supported."); } int l = e2.getLength(); if (strand) { // reverse softClipEndBases = readBases.substring(0, l - 1); softClipEndQualities = qualities.substring(0, l - 1); readBases = readBases.substring(l); read = read.substring(l); qualities = qualities.substring(l); } else { softClipEndBases = readBases.substring(readBases.length() - l); softClipEndQualities = qualities.substring(qualities.length() - l); readBases = readBases.substring(0, readBases.length() - l); read = read.substring(0, read.length() - l); qualities = qualities.substring(0, qualities.length() - l); } } } // Remove mate pair information Align.removeMateInfo(rec); comp = new AlignHeapNodeComparator( (strand) ? AlignHeap.HeapType.MAXHEAP : AlignHeap.HeapType.MINHEAP); // Bound by original alignment if possible bestAlignHeapNode = Align.boundWithOriginalAlignment( rec, graph, recNode, comp, strand, read, qualities, readBases, space, sequence, alleleCoverageCutoffs, useSequenceQualities, MAXIMUM_TOTAL_COVERAGE, MAX_HEAP_SIZE); /* System.err.println("readName="+rec.getReadName()); if(null != bestAlignHeapNode) { System.err.println("\nFOUND BEST:" + rec.toString()); } else { System.err.println("\nNOT FOUND (BEST): " + rec.toString()); } Align.updateSAM(rec, programRecord, bestAlignHeapNode, space, read, qualities, softClipStartBases, softClipStartQualities, softClipEndBases, softClipEndQualities, strand, correctBases); return; */ heap = new AlignHeap((strand) ? AlignHeap.HeapType.MAXHEAP : AlignHeap.HeapType.MINHEAP); // Add start nodes if (strand) { // reverse alignmentStart = rec.getAlignmentEnd(); for (i = alignmentStart + offset; alignmentStart - offset <= i; i--) { int position = graph.getPriorityQueueIndexAtPositionOrBefore(i); PriorityQueue<Node> startNodeQueue = graph.getPriorityQueue(position); if (0 != position && null != startNodeQueue) { Iterator<Node> startNodeQueueIter = startNodeQueue.iterator(); while (startNodeQueueIter.hasNext()) { Node startNode = startNodeQueueIter.next(); int f = passFilters(graph, startNode, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE); if (0 == f) { heap.add( new AlignHeapNode( null, startNode, startNode.coverage, read.charAt(0), qualities.charAt(0), useSequenceQualities, space)); } else if (f < 0) { return; } if (startNode.position < i) { i = startNode.position; } numStartNodesAdded++; } } } } else { alignmentStart = rec.getAlignmentStart(); for (i = alignmentStart - offset; i <= alignmentStart + offset; i++) { int position = graph.getPriorityQueueIndexAtPositionOrGreater(i); PriorityQueue<Node> startNodeQueue = graph.getPriorityQueue(position); if (0 != position && null != startNodeQueue) { Iterator<Node> startNodeQueueIter = startNodeQueue.iterator(); while (startNodeQueueIter.hasNext()) { Node startNode = startNodeQueueIter.next(); int f = passFilters(graph, startNode, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE); if (0 == f) { heap.add( new AlignHeapNode( null, startNode, startNode.coverage, read.charAt(0), qualities.charAt(0), useSequenceQualities, space)); } else if (f < 0) { return; } if (i < startNode.position) { i = startNode.position; } numStartNodesAdded++; } } } } if (numStartNodesAdded == 0) { throw new Exception("Did not add any start nodes!"); } // Get first node off the heap curAlignHeapNode = heap.poll(); while (null != curAlignHeapNode) { if (MAX_HEAP_SIZE <= heap.size()) { // too many to consider return; } // System.err.println("strand:" + strand + "\tsize:" + heap.size() + "\talignmentStart:" + // alignmentStart + "\toffset:" + offset + "\treadOffset:" + curAlignHeapNode.readOffset); // System.err.print("size:" + heap.size() + ":" + curAlignHeapNode.readOffset + ":" + // curAlignHeapNode.score + ":" + curAlignHeapNode.alleleCoverageSum + ":" + // curAlignHeapNode.startPosition + "\t"); // curAlignHeapNode.node.print(System.err); // System.err.print("\rposition:" + curAlignHeapNode.node.position + "\treadOffset:" + // curAlignHeapNode.readOffset); // Remove all non-insertions with the same contig/pos/read-offset/type/base and lower score nextAlignHeapNode = heap.peek(); while (Node.INSERTION != curAlignHeapNode.node.type && null != nextAlignHeapNode && 0 == comp.compare(curAlignHeapNode, nextAlignHeapNode)) { if (curAlignHeapNode.score < nextAlignHeapNode.score || (curAlignHeapNode.score == nextAlignHeapNode.score && curAlignHeapNode.alleleCoverageSum < nextAlignHeapNode.alleleCoverageSum)) { // Update current node curAlignHeapNode = heap.poll(); } else { // Ignore next node heap.poll(); } nextAlignHeapNode = heap.peek(); } nextAlignHeapNode = null; // Check if the alignment is complete if (curAlignHeapNode.readOffset == read.length() - 1) { // All read bases examined, store if has the best alignment. // System.err.print(curAlignHeapNode.alleleCoverageSum + ":" + curAlignHeapNode.score + // ":"); // System.err.print(curAlignHeapNode.startPosition + ":"); // curAlignHeapNode.node.print(System.err); if (null == bestAlignHeapNode || bestAlignHeapNode.score < curAlignHeapNode.score || (bestAlignHeapNode.score == curAlignHeapNode.score && bestAlignHeapNode.alleleCoverageSum < curAlignHeapNode.alleleCoverageSum)) { bestAlignHeapNode = curAlignHeapNode; } } else if (null != bestAlignHeapNode && curAlignHeapNode.score < bestAlignHeapNode.score) { // ignore, under the assumption that scores can only become more negative. } else { if (strand) { // reverse // Go to all the "prev" nodes iter = curAlignHeapNode.node.prev.listIterator(); } else { // forward // Go to all "next" nodes iter = curAlignHeapNode.node.next.listIterator(); } while (iter.hasNext()) { NodeRecord next = iter.next(); int f = passFilters( graph, next.node, next.coverage, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE); if (0 == f) { heap.add( new AlignHeapNode( curAlignHeapNode, next.node, next.coverage, read.charAt(curAlignHeapNode.readOffset + 1), qualities.charAt(curAlignHeapNode.readOffset + 1), useSequenceQualities, space)); } else if (f < 0) { return; } } iter = null; } // Get next node curAlignHeapNode = heap.poll(); } // Recover alignment Align.updateSAM( rec, sequence, programRecord, bestAlignHeapNode, space, read, qualities, softClipStartBases, softClipStartQualities, softClipEndBases, softClipEndQualities, strand, correctBases); }