public void set(MouseEvent e, SAMRecord sr) { if (sr == null) return; StringBuffer text = new StringBuffer(); text.append("<html>"); if (sr != null) { text.append( MessageManager.getString("shortreadtrack.name") + " " + sr.getReadName() + "<br/>"); text.append( MessageManager.getString("shortreadtrack.len") + " " + sr.getReadLength() + "<br/>"); text.append( MessageManager.getString("shortreadtrack.cigar") + " " + sr.getCigarString() + "<br/>"); text.append( MessageManager.getString("shortreadtrack.sequence") + " " + rerun(sr.getReadString()) + "<br/>"); text.append( MessageManager.getString("shortreadtrack.paired") + " " + sr.getReadPairedFlag() + "<br/>"); if (sr.getReadPairedFlag()) { if (!sr.getMateUnmappedFlag()) text.append( MessageManager.getString("shortreadtrack.mate") + " " + sr.getMateReferenceName() + ":" + sr.getMateAlignmentStart() + "<br/>"); else text.append(MessageManager.getString("shortreadtrack.mate_missing") + "<br/>"); text.append( MessageManager.getString("shortreadtrack.second") + " " + sr.getFirstOfPairFlag()); } // text.append("<br/>"); } text.append("</html>"); if (!text.toString().equals(floater.getText())) { floater.setText(text.toString()); this.pack(); } setLocation(e.getXOnScreen() + 5, e.getYOnScreen() + 5); if (!isVisible()) { setVisible(true); } }
public static void align( Graph graph, SAMRecord rec, Node recNode, ReferenceSequence sequence, SAMProgramRecord programRecord, int offset, AlleleCoverageCutoffs alleleCoverageCutoffs, boolean correctBases, boolean useSequenceQualities, int MAXIMUM_TOTAL_COVERAGE, int MAX_HEAP_SIZE) throws Exception { int i; AlignHeapNode curAlignHeapNode = null; AlignHeapNode nextAlignHeapNode = null; AlignHeapNode bestAlignHeapNode = null; AlignHeap heap = null; String read = null; // could be cs String readBases = null; // always nt String qualities = null; // could be cq SRMAUtil.Space space = SRMAUtil.Space.NTSPACE; ListIterator<NodeRecord> iter = null; AlignHeapNodeComparator comp = null; int alignmentStart = -1; int numStartNodesAdded = 0; boolean strand = rec.getReadNegativeStrandFlag(); // false -> forward, true -> reverse String softClipStartBases = null; String softClipStartQualities = null; String softClipEndBases = null; String softClipEndQualities = null; // Debugging stuff String readName = rec.getReadName(); assert SRMAUtil.Space.COLORSPACE != space; // Get space read = (String) rec.getAttribute("CS"); if (null == read) { // Use base space space = SRMAUtil.Space.NTSPACE; } else { // assumes CS and CQ are always in sequencing order space = SRMAUtil.Space.COLORSPACE; } // Get read and qualities if (space == SRMAUtil.Space.NTSPACE) { byte tmpRead[] = rec.getReadString().getBytes(); byte tmpQualities[] = rec.getBaseQualityString().getBytes(); // Reverse once if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); SAMRecordUtil.reverseArray(tmpQualities); } read = new String(tmpRead); readBases = new String(tmpRead); qualities = new String(tmpQualities); // Reverse again if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); SAMRecordUtil.reverseArray(tmpQualities); } } else { byte tmpRead[] = rec.getReadString().getBytes(); // Reverse once if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); } readBases = new String(tmpRead); // Reverse again if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); } read = SRMAUtil.normalizeColorSpaceRead(read); qualities = (String) rec.getAttribute("CQ"); // Some aligners include a quality value for the adapter. A quality value // IMHO should not be given for an unobserved (assumed) peice of data. Trim // the first quality in this case if (qualities.length() == 1 + read.length()) { // trim the first quality qualities = qualities.substring(1); } } // Reverse back if (readBases.length() <= 0) { throw new Exception("Error. The current alignment has no bases."); } if (read.length() <= 0) { throw new Exception("Error. The current alignment has no bases."); } if (qualities.length() <= 0) { throw new Exception("Error. The current alignment has no qualities."); } if (readBases.length() != read.length()) { if (space == SRMAUtil.Space.COLORSPACE) { throw new Exception( "Error. The current alignment's read bases length does not match the length of the colors in the CS tag [" + rec.getReadName() + "]."); } else { throw new Exception("Error. Internal error: readBases.length() != read.length()"); } } // Deal with soft-clipping // - save the soft clipped sequence for latter { List<CigarElement> cigarElements = null; cigarElements = rec.getCigar().getCigarElements(); CigarElement e1 = cigarElements.get(0); // first CigarElement e2 = cigarElements.get(cigarElements.size() - 1); // last // Soft-clipped if (CigarOperator.S == e1.getOperator()) { if (space == SRMAUtil.Space.COLORSPACE) { throw new Exception( "Error. Soft clipping with color-space data not currently supported."); } int l = e1.getLength(); if (strand) { // reverse softClipStartBases = readBases.substring(readBases.length() - l); softClipStartQualities = qualities.substring(qualities.length() - l); readBases = readBases.substring(0, readBases.length() - l); read = read.substring(0, read.length() - l); qualities = qualities.substring(0, qualities.length() - l); } else { softClipStartBases = readBases.substring(0, l - 1); softClipStartQualities = qualities.substring(0, l - 1); readBases = readBases.substring(l); read = read.substring(l); qualities = qualities.substring(l); } } if (CigarOperator.S == e2.getOperator()) { if (space == SRMAUtil.Space.COLORSPACE) { throw new Exception( "Error. Soft clipping with color-space data not currently supported."); } int l = e2.getLength(); if (strand) { // reverse softClipEndBases = readBases.substring(0, l - 1); softClipEndQualities = qualities.substring(0, l - 1); readBases = readBases.substring(l); read = read.substring(l); qualities = qualities.substring(l); } else { softClipEndBases = readBases.substring(readBases.length() - l); softClipEndQualities = qualities.substring(qualities.length() - l); readBases = readBases.substring(0, readBases.length() - l); read = read.substring(0, read.length() - l); qualities = qualities.substring(0, qualities.length() - l); } } } // Remove mate pair information Align.removeMateInfo(rec); comp = new AlignHeapNodeComparator( (strand) ? AlignHeap.HeapType.MAXHEAP : AlignHeap.HeapType.MINHEAP); // Bound by original alignment if possible bestAlignHeapNode = Align.boundWithOriginalAlignment( rec, graph, recNode, comp, strand, read, qualities, readBases, space, sequence, alleleCoverageCutoffs, useSequenceQualities, MAXIMUM_TOTAL_COVERAGE, MAX_HEAP_SIZE); /* System.err.println("readName="+rec.getReadName()); if(null != bestAlignHeapNode) { System.err.println("\nFOUND BEST:" + rec.toString()); } else { System.err.println("\nNOT FOUND (BEST): " + rec.toString()); } Align.updateSAM(rec, programRecord, bestAlignHeapNode, space, read, qualities, softClipStartBases, softClipStartQualities, softClipEndBases, softClipEndQualities, strand, correctBases); return; */ heap = new AlignHeap((strand) ? AlignHeap.HeapType.MAXHEAP : AlignHeap.HeapType.MINHEAP); // Add start nodes if (strand) { // reverse alignmentStart = rec.getAlignmentEnd(); for (i = alignmentStart + offset; alignmentStart - offset <= i; i--) { int position = graph.getPriorityQueueIndexAtPositionOrBefore(i); PriorityQueue<Node> startNodeQueue = graph.getPriorityQueue(position); if (0 != position && null != startNodeQueue) { Iterator<Node> startNodeQueueIter = startNodeQueue.iterator(); while (startNodeQueueIter.hasNext()) { Node startNode = startNodeQueueIter.next(); int f = passFilters(graph, startNode, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE); if (0 == f) { heap.add( new AlignHeapNode( null, startNode, startNode.coverage, read.charAt(0), qualities.charAt(0), useSequenceQualities, space)); } else if (f < 0) { return; } if (startNode.position < i) { i = startNode.position; } numStartNodesAdded++; } } } } else { alignmentStart = rec.getAlignmentStart(); for (i = alignmentStart - offset; i <= alignmentStart + offset; i++) { int position = graph.getPriorityQueueIndexAtPositionOrGreater(i); PriorityQueue<Node> startNodeQueue = graph.getPriorityQueue(position); if (0 != position && null != startNodeQueue) { Iterator<Node> startNodeQueueIter = startNodeQueue.iterator(); while (startNodeQueueIter.hasNext()) { Node startNode = startNodeQueueIter.next(); int f = passFilters(graph, startNode, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE); if (0 == f) { heap.add( new AlignHeapNode( null, startNode, startNode.coverage, read.charAt(0), qualities.charAt(0), useSequenceQualities, space)); } else if (f < 0) { return; } if (i < startNode.position) { i = startNode.position; } numStartNodesAdded++; } } } } if (numStartNodesAdded == 0) { throw new Exception("Did not add any start nodes!"); } // Get first node off the heap curAlignHeapNode = heap.poll(); while (null != curAlignHeapNode) { if (MAX_HEAP_SIZE <= heap.size()) { // too many to consider return; } // System.err.println("strand:" + strand + "\tsize:" + heap.size() + "\talignmentStart:" + // alignmentStart + "\toffset:" + offset + "\treadOffset:" + curAlignHeapNode.readOffset); // System.err.print("size:" + heap.size() + ":" + curAlignHeapNode.readOffset + ":" + // curAlignHeapNode.score + ":" + curAlignHeapNode.alleleCoverageSum + ":" + // curAlignHeapNode.startPosition + "\t"); // curAlignHeapNode.node.print(System.err); // System.err.print("\rposition:" + curAlignHeapNode.node.position + "\treadOffset:" + // curAlignHeapNode.readOffset); // Remove all non-insertions with the same contig/pos/read-offset/type/base and lower score nextAlignHeapNode = heap.peek(); while (Node.INSERTION != curAlignHeapNode.node.type && null != nextAlignHeapNode && 0 == comp.compare(curAlignHeapNode, nextAlignHeapNode)) { if (curAlignHeapNode.score < nextAlignHeapNode.score || (curAlignHeapNode.score == nextAlignHeapNode.score && curAlignHeapNode.alleleCoverageSum < nextAlignHeapNode.alleleCoverageSum)) { // Update current node curAlignHeapNode = heap.poll(); } else { // Ignore next node heap.poll(); } nextAlignHeapNode = heap.peek(); } nextAlignHeapNode = null; // Check if the alignment is complete if (curAlignHeapNode.readOffset == read.length() - 1) { // All read bases examined, store if has the best alignment. // System.err.print(curAlignHeapNode.alleleCoverageSum + ":" + curAlignHeapNode.score + // ":"); // System.err.print(curAlignHeapNode.startPosition + ":"); // curAlignHeapNode.node.print(System.err); if (null == bestAlignHeapNode || bestAlignHeapNode.score < curAlignHeapNode.score || (bestAlignHeapNode.score == curAlignHeapNode.score && bestAlignHeapNode.alleleCoverageSum < curAlignHeapNode.alleleCoverageSum)) { bestAlignHeapNode = curAlignHeapNode; } } else if (null != bestAlignHeapNode && curAlignHeapNode.score < bestAlignHeapNode.score) { // ignore, under the assumption that scores can only become more negative. } else { if (strand) { // reverse // Go to all the "prev" nodes iter = curAlignHeapNode.node.prev.listIterator(); } else { // forward // Go to all "next" nodes iter = curAlignHeapNode.node.next.listIterator(); } while (iter.hasNext()) { NodeRecord next = iter.next(); int f = passFilters( graph, next.node, next.coverage, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE); if (0 == f) { heap.add( new AlignHeapNode( curAlignHeapNode, next.node, next.coverage, read.charAt(curAlignHeapNode.readOffset + 1), qualities.charAt(curAlignHeapNode.readOffset + 1), useSequenceQualities, space)); } else if (f < 0) { return; } } iter = null; } // Get next node curAlignHeapNode = heap.poll(); } // Recover alignment Align.updateSAM( rec, sequence, programRecord, bestAlignHeapNode, space, read, qualities, softClipStartBases, softClipStartQualities, softClipEndBases, softClipEndQualities, strand, correctBases); }
@Override public int doWork(String[] args) { boolean repair_missing_read = false; SortingCollectionFactory<MappedFastq> sortingFactory = new SortingCollectionFactory<MappedFastq>(); File forwardFile = null; File reverseFile = null; com.github.lindenb.jvarkit.util.cli.GetOpt opt = new com.github.lindenb.jvarkit.util.cli.GetOpt(); int c; sortingFactory.setComponentType(MappedFastq.class); sortingFactory.setCodec(new MappedFastqCodec()); sortingFactory.setComparator(new MappedFastqComparator()); while ((c = opt.getopt(args, super.getGetOptDefault() + "F:R:N:r")) != -1) { switch (c) { case 'F': forwardFile = new File(opt.getOptArg()); break; case 'R': reverseFile = new File(opt.getOptArg()); break; case 't': addTmpDirectory(new File(opt.getOptArg())); break; case 'N': sortingFactory.setMaxRecordsInRAM(Math.max(Integer.parseInt(opt.getOptArg()), 100)); break; case 'r': repair_missing_read = true; break; case ':': System.err.println("Missing argument for option -" + opt.getOptOpt()); return -1; default: { switch (handleOtherOptions(c, opt, args)) { case EXIT_FAILURE: return -1; case EXIT_SUCCESS: return 0; default: break; } } } } SAMFileReader sfr = null; SortingCollection<MappedFastq> fastqCollection = null; try { sortingFactory.setTmpDirs(this.getTmpDirectories()); fastqCollection = sortingFactory.make(); fastqCollection.setDestructiveIteration(true); boolean found_single = false; boolean found_paired = false; long non_primary_alignmaned_flag = 0L; if (opt.getOptInd() == args.length) { info("Reading from stdin"); sfr = new SAMFileReader(System.in); } else if (opt.getOptInd() + 1 == args.length) { String filename = args[opt.getOptInd()]; sfr = new SAMFileReader(new File(filename)); } else { error(getMessageBundle("illegal.number.of.arguments")); return -1; } sfr.setValidationStringency(ValidationStringency.LENIENT); SAMRecordIterator iter = sfr.iterator(); SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(sfr.getFileHeader().getSequenceDictionary()); while (iter.hasNext()) { SAMRecord rec = iter.next(); progress.watch(rec); if (rec.isSecondaryOrSupplementary()) { if (non_primary_alignmaned_flag == 0) { warning("SKIPPING NON-PRIMARY " + (non_primary_alignmaned_flag + 1) + " ALIGNMENTS"); } non_primary_alignmaned_flag++; continue; } MappedFastq m = new MappedFastq(); m.name = rec.getReadName(); if (m.name == null) m.name = ""; m.hash = m.name.hashCode(); m.seq = rec.getReadString(); if (m.seq.equals(SAMRecord.NULL_SEQUENCE_STRING)) m.seq = ""; m.qual = rec.getBaseQualityString(); if (m.qual.equals(SAMRecord.NULL_QUALS_STRING)) m.qual = ""; if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) { m.seq = AcidNucleics.reverseComplement(m.seq); m.qual = new StringBuilder(m.qual).reverse().toString(); } if (m.seq.length() != m.qual.length()) { error("length(seq)!=length(qual) in " + m.name); continue; } if (m.seq.isEmpty() && m.qual.isEmpty()) { m.seq = "N"; m.qual = "#"; } if (rec.getReadPairedFlag()) { found_paired = true; if (found_single) { sfr.close(); throw new PicardException("input is a mix of paired/singled reads"); } m.side = (byte) (rec.getSecondOfPairFlag() ? 2 : 1); } else { found_single = true; if (found_paired) { sfr.close(); throw new PicardException("input is a mix of paired/singled reads"); } m.side = (byte) 0; } fastqCollection.add(m); } iter.close(); CloserUtil.close(iter); CloserUtil.close(sfr); progress.finish(); fastqCollection.doneAdding(); info("Done reading."); if (found_paired) { FastqWriter fqw1 = null; FastqWriter fqw2 = null; if (forwardFile != null) { info("Writing to " + forwardFile); fqw1 = new BasicFastqWriter(forwardFile); } else { info("Writing to stdout"); fqw1 = new BasicFastqWriter(new PrintStream(System.out)); } if (reverseFile != null) { info("Writing to " + reverseFile); fqw2 = new BasicFastqWriter(reverseFile); } else { info("Writing to interlaced stdout"); fqw2 = fqw1; } List<MappedFastq> row = new ArrayList<MappedFastq>(); CloseableIterator<MappedFastq> r = fastqCollection.iterator(); for (; ; ) { MappedFastq curr = null; if (r.hasNext()) curr = r.next(); if (curr == null || (!row.isEmpty() && !row.get(0).name.equals(curr.name))) { if (!row.isEmpty()) { if (row.size() > 2) { warning("WTF :" + row); } boolean found_F = false; boolean found_R = false; for (MappedFastq m : row) { switch ((int) m.side) { case 1: if (found_F) throw new PicardException("two forward reads found for " + row.get(0).name); found_F = true; echo(fqw1, m); break; case 2: if (found_R) throw new PicardException("two reverse reads found for " + row.get(0).name); found_R = true; echo(fqw2, m); break; default: throw new IllegalStateException("uh???"); } } if (!found_F) { if (repair_missing_read) { warning("forward not found for " + row.get(0)); MappedFastq pad = new MappedFastq(); pad.side = (byte) 1; pad.name = row.get(0).name; pad.seq = "N"; pad.qual = "#"; echo(fqw1, pad); } else { throw new PicardException("forward not found for " + row); } } if (!found_R) { if (repair_missing_read) { warning("reverse not found for " + row.get(0)); MappedFastq pad = new MappedFastq(); pad.side = (byte) 2; pad.name = row.get(0).name; pad.seq = "N"; pad.qual = "#"; echo(fqw2, pad); } else { throw new PicardException("reverse not found for " + row); } } } if (curr == null) break; row.clear(); } row.add(curr); } r.close(); fqw1.close(); fqw2.close(); } else if (found_single) { FastqWriter fqw1 = null; if (forwardFile != null) { info("Writing to " + forwardFile); fqw1 = new BasicFastqWriter(forwardFile); } else { info("Writing to stdout"); fqw1 = new BasicFastqWriter(new PrintStream(System.out)); } CloseableIterator<MappedFastq> r = fastqCollection.iterator(); while (r.hasNext()) { echo(fqw1, r.next()); } r.close(); fqw1.close(); } return 0; } catch (Exception err) { error(err); return -1; } finally { if (fastqCollection != null) fastqCollection.cleanup(); } }