/** * Finds the adaptor boundary around the read and returns the first base inside the adaptor that * is closest to the read boundary. If the read is in the positive strand, this is the first base * after the end of the fragment (Picard calls it 'insert'), if the read is in the negative * strand, this is the first base before the beginning of the fragment. * * <p>There are two cases we need to treat here: * * <p>1) Our read is in the reverse strand : * * <p><----------------------| * |---------------------> * * <p>in these cases, the adaptor boundary is at the mate start (minus one) * * <p>2) Our read is in the forward strand : * * <p>|----------------------> * <----------------------| * * <p>in these cases the adaptor boundary is at the start of the read plus the inferred insert * size (plus one) * * @param read the read being tested for the adaptor boundary * @return the reference coordinate for the adaptor boundary (effectively the first base IN the * adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another * contig. */ public static Integer getAdaptorBoundary(final SAMRecord read) { final int MAXIMUM_ADAPTOR_LENGTH = 8; final int insertSize = Math.abs( read .getInferredInsertSize()); // the inferred insert size can be negative if the mate // is mapped before the read (so we take the absolute // value) if (insertSize == 0 || read .getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or // unmapped pairs return null; Integer adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first // base IN the adaptor, closest to the read) if (read.getReadNegativeStrandFlag()) adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header) else adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header) if ((adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH)) adaptorBoundary = null; // we are being conservative by not allowing the adaptor boundary to go beyond what // we belive is the maximum size of an adaptor return adaptorBoundary; }
public SAMRecord poll() { if (drain) return doPoll(); SAMRecord head = delegateQueue.peek(); if (head == null) return null; if (maxAlignmentStart - head.getAlignmentStart() < minAlignmentDelay) return null; return doPoll(); }
public MyIterator(SAMRecord record) { this.record = record; if (record.getReadUnmappedFlag()) throw new IllegalArgumentException("unmapped read"); this.readPos0 = 0; this.genomePos1 = record.getAlignmentStart(); this._next_genomePos1 = this.genomePos1; this._next_readPos0 = this.readPos0; if (this.genomePos1 < 1) throw new IllegalArgumentException("record"); this.cigar = record.getCigar(); }
/** * The sorting by alignment start is not stable. Therefore we use an integere value here, * which is incremented for each record added/ * * @param o1 * @param o2 * @return */ @Override public int compare(SAMRecord o1, SAMRecord o2) { int result = o1.getAlignmentStart() - o2.getAlignmentStart(); if (result != 0) return result; int c1 = (Integer) o1.getAttribute("X~"); int c2 = (Integer) o2.getAttribute("X~"); return c1 - c2; // result = o1.getReadLength() - o2.getReadLength(); // if (result != 0) // return result; // // result = o1.getReadString().compareTo(o2.getReadString()); // if (result != 0) // return result; // // result = o1.getReadName().compareTo(o2.getReadName()); // return result; }
public void dump() { while (!delegateQueue.isEmpty()) { SAMRecord r = doPoll(); System.out.println(r.getAlignmentStart() + "\t" + r.getReadName()); } }
public void add(SAMRecord record) { record.setAttribute("X~", counter++); delegateQueue.add(record); maxAlignmentStart = Math.max(maxAlignmentStart, record.getAlignmentStart()); }
@Override public int doWork(String[] args) { File refFile = null; com.github.lindenb.jvarkit.util.cli.GetOpt getopt = new com.github.lindenb.jvarkit.util.cli.GetOpt(); int c; while ((c = getopt.getopt(args, "hvL:r:")) != -1) { switch (c) { case 'h': printUsage(); return 0; case 'v': System.out.println(getVersion()); return 0; case 'L': getLogger().setLevel(java.util.logging.Level.parse(getopt.getOptArg())); break; case 'r': refFile = new File(getopt.getOptArg()); break; case ':': System.err.println("Missing argument for option -" + getopt.getOptOpt()); return -1; default: System.err.println("Unknown option -" + getopt.getOptOpt()); return -1; } } if (refFile == null) { error("Undefined REF file"); return -1; } File bamFile = null; if (getopt.getOptInd() + 1 != args.length) { info("reading from stdin."); } else { bamFile = new File(args[getopt.getOptInd()]); } IndexedFastaSequenceFile indexedFastaSequenceFile = null; SAMFileReader samFileReader = null; try { GenomicSequence genomicSequence = null; indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile); SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); samFileReader = null; if (bamFile == null) { samFileReader = new SAMFileReader(System.in); } else { samFileReader = new SAMFileReader(bamFile); } XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance(); XMLStreamWriter w = xmlfactory.createXMLStreamWriter(System.out, "UTF-8"); w.writeStartDocument("UTF-8", "1.0"); w.writeStartElement("sam"); w.writeComment(getProgramCommandLine()); w.writeAttribute("ref", (bamFile == null ? "stdin" : bamFile.getPath())); w.writeAttribute("bam", args[1]); SAMRecordIterator iter = samFileReader.iterator(); while (iter.hasNext()) { SAMRecord rec = iter.next(); final byte readbases[] = rec.getReadBases(); w.writeStartElement("read"); w.writeStartElement("name"); w.writeCharacters(rec.getReadName()); w.writeEndElement(); w.writeStartElement("sequence"); w.writeCharacters(new String(readbases)); w.writeEndElement(); w.writeStartElement("flags"); w.writeAttribute("paired", String.valueOf(rec.getReadPairedFlag())); w.writeAttribute( "failsVendorQual", String.valueOf(rec.getReadFailsVendorQualityCheckFlag())); w.writeAttribute("mapped", String.valueOf(!rec.getReadUnmappedFlag())); w.writeAttribute("strand", (rec.getReadNegativeStrandFlag() ? "-" : "+")); if (rec.getReadPairedFlag()) { w.writeAttribute("mate-mapped", String.valueOf(!rec.getMateUnmappedFlag())); w.writeAttribute("mate-strand", (rec.getMateNegativeStrandFlag() ? "-" : "+")); w.writeAttribute("proper-pair", String.valueOf(rec.getProperPairFlag())); } w.writeCharacters(String.valueOf(rec.getFlags())); w.writeEndElement(); if (!rec.getReadUnmappedFlag()) { w.writeStartElement("qual"); w.writeCharacters(String.valueOf(rec.getMappingQuality())); w.writeEndElement(); w.writeStartElement("chrom"); w.writeAttribute("index", String.valueOf(rec.getReferenceIndex())); w.writeCharacters(rec.getReferenceName()); w.writeEndElement(); w.writeStartElement("pos"); w.writeCharacters(String.valueOf(rec.getAlignmentStart())); w.writeEndElement(); w.writeStartElement("cigar"); w.writeCharacters(rec.getCigarString()); w.writeEndElement(); } if (!rec.getMateUnmappedFlag()) { w.writeStartElement("mate-chrom"); w.writeAttribute("index", String.valueOf(rec.getMateReferenceIndex())); w.writeCharacters(rec.getMateReferenceName()); w.writeEndElement(); w.writeStartElement("mate-pos"); w.writeCharacters(String.valueOf(rec.getMateAlignmentStart())); w.writeEndElement(); } if (!rec.getReadUnmappedFlag()) { if (genomicSequence == null || genomicSequence.getChrom().equals(rec.getReferenceName())) { genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName()); } w.writeStartElement("align"); int readIndex = 0; int refIndex = rec.getAlignmentStart(); for (final CigarElement e : rec.getCigar().getCigarElements()) { switch (e.getOperator()) { case H: break; // ignore hard clips case P: break; // ignore pads case I: // cont. case S: { final int length = e.getLength(); for (int i = 0; i < length; ++i) { w.writeEmptyElement(e.getOperator().name()); w.writeAttribute("read-index", String.valueOf(readIndex + 1)); if (readIndex >= 0 && readIndex < readbases.length) { w.writeAttribute("read-base", String.valueOf((char) (readbases[readIndex]))); } readIndex++; } break; } case N: // cont. -- reference skip case D: { final int length = e.getLength(); for (int i = 0; i < length; ++i) { w.writeEmptyElement(e.getOperator().name()); w.writeAttribute("ref-index", String.valueOf(refIndex)); if (refIndex >= 1 && refIndex <= genomicSequence.length()) { w.writeAttribute( "ref-base", String.valueOf(genomicSequence.charAt(refIndex - 1))); } refIndex++; } break; } case M: case EQ: case X: { final int length = e.getLength(); for (int i = 0; i < length; ++i) { w.writeEmptyElement(e.getOperator().name()); char baseRead = '\0'; if (readIndex >= 0 && readIndex < readbases.length) { baseRead = (char) (rec.getReadBases()[readIndex]); w.writeAttribute("read-index", String.valueOf(readIndex + 1)); w.writeAttribute("read-base", String.valueOf(baseRead)); } w.writeAttribute("ref-index", String.valueOf(refIndex)); if (refIndex >= 1 && refIndex <= genomicSequence.length()) { char baseRef = genomicSequence.charAt(refIndex - 1); w.writeAttribute("ref-base", String.valueOf(baseRef)); if (Character.toUpperCase(baseRef) != Character.toUpperCase(baseRead)) { w.writeAttribute("mismatch", "true"); } } refIndex++; readIndex++; } break; } default: throw new IllegalStateException( "Case statement didn't deal with cigar op: " + e.getOperator()); } } } w.writeEndElement(); w.writeEndElement(); iter.close(); w.writeEndElement(); } w.writeEndElement(); w.writeEndDocument(); w.flush(); w.close(); } catch (Exception err) { error(err); return -1; } finally { CloserUtil.close(samFileReader); CloserUtil.close(indexedFastaSequenceFile); } return 0; }
public static void main(String[] args) { args = new String[] { "/commun/data/users/cfaucheron/aln_20120329/S0529/data_S0529/S0529_sort.nodup.bam" }; ReferenceSequenceFile rsf = ReferenceSequenceFileFactory.getReferenceSequenceFile( new File("/commun/data/pubdb/ucsc/hg19/chromosomes/hg19.fa")); int count = 0; for (String filename : args) { File file = new File(filename); SAMFileReader samIn = new SAMFileReader(file); SAMRecordIterator r = samIn.iterator(); while (r.hasNext()) { SAMRecord rec = r.next(); if (rec.getReadUnmappedFlag()) continue; if (++count > 10000) break; if (rec.getAlignmentStart() > rec.getAlignmentEnd()) throw new IllegalStateException(); byte bases[] = rsf.getSubsequenceAt( rec.getReferenceName(), rec.getAlignmentStart(), Math.max( rec.getAlignmentEnd(), rec.getAlignmentStart() + rec.getCigar().getPaddedReferenceLength())) .getBases(); Iterator<CigarAlignment> i = CigarAlignment.iterator(rec); /*System.err.println(rec.getCigarString()); System.err.println(bases.length); System.err.println("start:"+rec.getAlignmentStart());*/ StringBuilder s1 = new StringBuilder(); StringBuilder s2 = new StringBuilder(); while (i.hasNext()) { CigarAlignment caln = i.next(); /* System.err.println(rec.getCigarString()); System.err.println("bases.length:"+bases.length); System.err.println("refpos:"+caln.getReferencePosition1()); System.err.println("readpos:"+rec.getAlignmentStart()); */ if (caln.getReferencePosition1() - rec.getAlignmentStart() >= bases.length) { System.out.println("SHORT!"); System.out.println("op:" + caln.getCigarOperator()); System.out.println("read start:" + rec.getAlignmentStart()); System.out.println("clan.pos1:" + caln.getReferencePosition1()); System.out.println("read end:" + rec.getAlignmentEnd()); System.out.println("bases.length:" + bases.length); System.out.println( "getPaddedReferenceLength:" + rec.getCigar().getPaddedReferenceLength()); System.out.println("getReferenceLength:" + rec.getCigar().getReferenceLength()); System.out.println("getReadLength:" + rec.getCigar().getReadLength()); System.out.println( "cigar.read.length:" + Cigar.getReadLength(rec.getCigar().getCigarElements())); count = 2000; break; } if (caln.isInsertRef()) { s2.append("-"); s1.append(caln.getReadBase()); } else if (caln.isDeletionRef()) { s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]); s1.append("-"); } else { s2.append((char) bases[caln.getReferencePosition1() - rec.getAlignmentStart()]); s1.append(caln.getReadBase()); } // System.out.println(s1); // System.out.println(s2); // System.out.println(); } System.out.println( rec.getCigarString() + " " + rec.getReferenceName() + ":" + rec.getAlignmentStart()); System.out.println("ref :" + new String(bases)); System.out.println("read:" + new String(rec.getReadBases())); System.out.println(); System.out.println(s1); System.out.println(s2); System.out.println(); } samIn.close(); } }
public void find_coverage(SAMResource sres) { int start_base = sres.region.range.start; int end_base = sres.region.range.end; int coverage_len = (end_base - start_base) + 1; int i, end, ref_i, read_i, len; int[] coverage = new int[coverage_len]; Arrays.fill(coverage, 0); WorkingFile wf = null; if (outfile != null) { try { wf = new WorkingFile(outfile); ps = wf.getPrintStream(); } catch (Exception e) { System.err.println("I/O error: " + e); // debug e.printStackTrace(); System.exit(1); } } try { // // gather coverage info: // CloseableIterator<SAMRecord> iterator = sres.get_iterator(); int read_count = 0; int ref_min = -1; int ref_max = -1; while (iterator.hasNext()) { SAMRecord sr = iterator.next(); read_count++; // System.err.println(sr.getReadName() + ": " + sr.getAlignmentStart() + "-" + // sr.getAlignmentEnd()); // debug if (sr.getReadUnmappedFlag()) continue; if (sr.getDuplicateReadFlag()) { if (verbose_mode) System.err.println( sr.getReadName() + "." + (sr.getReadNegativeStrandFlag() ? "R" : "F") + " ignoring, duplicate"); continue; } byte[] read = sr.getReadBases(); byte[] quals = sr.getBaseQualities(); for (AlignmentBlock ab : sr.getAlignmentBlocks()) { len = ab.getLength(); read_i = ab.getReadStart() - 1; ref_i = ab.getReferenceStart() - start_base; if (ref_min == -1 || ref_i < ref_min) ref_min = ref_i; for (i = read_i, end = read_i + len; i < end; i++, ref_i++) { if (ref_i >= 0 && ref_i < coverage_len) { if (quals[i] >= MIN_QUALITY) { if (verbose_mode) System.err.println( sr.getReadName() + "." + (sr.getReadNegativeStrandFlag() ? "R" : "F") + " hit at " + (ref_i + start_base) + " as=" + sr.getAlignmentStart() + " ae=" + sr.getAlignmentEnd()); coverage[ref_i]++; } else if (verbose_mode) { System.err.println( sr.getReadName() + "." + (sr.getReadNegativeStrandFlag() ? "R" : "F") + " qual_reject at " + (ref_i + start_base) + " as=" + sr.getAlignmentStart() + " ae=" + sr.getAlignmentEnd()); } } } if (ref_max == -1 || ref_i > ref_max) ref_max = ref_i; } } sres.close(); System.err.println( "records:" + read_count + " ref_min:" + (ref_min + start_base) + " ref_max:" + (ref_max + start_base)); // debug // // report coverage info: // for (i = 0; i < coverage.length; i++) { if (name != null) ps.print(name + ","); ps.println((i + start_base) + "," + coverage[i]); // debug } if (wf != null) wf.finish(); } catch (Exception e) { System.err.println("ERROR: " + e); // debug e.printStackTrace(); } }
public void run(URL url) throws SocketException, IOException { FTPSeekableStream fss = new FTPSeekableStream(url); File index = fss.getIndexFile(); SAMFileReader reader = new SAMFileReader(fss, index, false); reader.getFileHeader(); reader.setValidationStringency(ValidationStringency.SILENT); logger.info("attributes"); for (Map.Entry<String, String> entry : reader.getFileHeader().getAttributes()) { logger.info(String.format("%s : %s", entry.getKey(), entry.getValue())); } logger.info("sequences"); for (SAMSequenceRecord ssr : reader.getFileHeader().getSequenceDictionary().getSequences()) { logger.info(String.format("%s : %s ", ssr.getSequenceName(), ssr.getSequenceLength())); } logger.info("sequences"); for (SAMSequenceRecord ssr : reader.getFileHeader().getSequenceDictionary().getSequences()) { // if (! ssr.getSequenceName().equals("NT_166325")) { // continue; // } int length = ssr.getSequenceLength(); int min = 1000; int max = 100000; if (min >= length) { min = 0; } // logger.warn((max >= length)); if (max >= length) { max = length; } if (min >= max) { min = 0; } logger.info( String.format("Sequence: %s (%s) %s-%s", ssr.getSequenceName(), length, min, max)); SAMRecordIterator i = reader.query(ssr.getSequenceName(), min, max, false); while (i.hasNext()) { SAMRecord record = i.next(); logger.info( String.format( "Read: %s (%s (%s-%s) %s) / %s", record.getReadName(), min, record.getAlignmentStart(), record.getAlignmentEnd(), max, record.getFlags())); assertTrue(record.getAlignmentStart() >= min); } i.close(); logger.info("_________________________________________________"); } logger.info("Done"); }
public static void align( Graph graph, SAMRecord rec, Node recNode, ReferenceSequence sequence, SAMProgramRecord programRecord, int offset, AlleleCoverageCutoffs alleleCoverageCutoffs, boolean correctBases, boolean useSequenceQualities, int MAXIMUM_TOTAL_COVERAGE, int MAX_HEAP_SIZE) throws Exception { int i; AlignHeapNode curAlignHeapNode = null; AlignHeapNode nextAlignHeapNode = null; AlignHeapNode bestAlignHeapNode = null; AlignHeap heap = null; String read = null; // could be cs String readBases = null; // always nt String qualities = null; // could be cq SRMAUtil.Space space = SRMAUtil.Space.NTSPACE; ListIterator<NodeRecord> iter = null; AlignHeapNodeComparator comp = null; int alignmentStart = -1; int numStartNodesAdded = 0; boolean strand = rec.getReadNegativeStrandFlag(); // false -> forward, true -> reverse String softClipStartBases = null; String softClipStartQualities = null; String softClipEndBases = null; String softClipEndQualities = null; // Debugging stuff String readName = rec.getReadName(); assert SRMAUtil.Space.COLORSPACE != space; // Get space read = (String) rec.getAttribute("CS"); if (null == read) { // Use base space space = SRMAUtil.Space.NTSPACE; } else { // assumes CS and CQ are always in sequencing order space = SRMAUtil.Space.COLORSPACE; } // Get read and qualities if (space == SRMAUtil.Space.NTSPACE) { byte tmpRead[] = rec.getReadString().getBytes(); byte tmpQualities[] = rec.getBaseQualityString().getBytes(); // Reverse once if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); SAMRecordUtil.reverseArray(tmpQualities); } read = new String(tmpRead); readBases = new String(tmpRead); qualities = new String(tmpQualities); // Reverse again if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); SAMRecordUtil.reverseArray(tmpQualities); } } else { byte tmpRead[] = rec.getReadString().getBytes(); // Reverse once if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); } readBases = new String(tmpRead); // Reverse again if (strand) { // reverse SAMRecordUtil.reverseArray(tmpRead); } read = SRMAUtil.normalizeColorSpaceRead(read); qualities = (String) rec.getAttribute("CQ"); // Some aligners include a quality value for the adapter. A quality value // IMHO should not be given for an unobserved (assumed) peice of data. Trim // the first quality in this case if (qualities.length() == 1 + read.length()) { // trim the first quality qualities = qualities.substring(1); } } // Reverse back if (readBases.length() <= 0) { throw new Exception("Error. The current alignment has no bases."); } if (read.length() <= 0) { throw new Exception("Error. The current alignment has no bases."); } if (qualities.length() <= 0) { throw new Exception("Error. The current alignment has no qualities."); } if (readBases.length() != read.length()) { if (space == SRMAUtil.Space.COLORSPACE) { throw new Exception( "Error. The current alignment's read bases length does not match the length of the colors in the CS tag [" + rec.getReadName() + "]."); } else { throw new Exception("Error. Internal error: readBases.length() != read.length()"); } } // Deal with soft-clipping // - save the soft clipped sequence for latter { List<CigarElement> cigarElements = null; cigarElements = rec.getCigar().getCigarElements(); CigarElement e1 = cigarElements.get(0); // first CigarElement e2 = cigarElements.get(cigarElements.size() - 1); // last // Soft-clipped if (CigarOperator.S == e1.getOperator()) { if (space == SRMAUtil.Space.COLORSPACE) { throw new Exception( "Error. Soft clipping with color-space data not currently supported."); } int l = e1.getLength(); if (strand) { // reverse softClipStartBases = readBases.substring(readBases.length() - l); softClipStartQualities = qualities.substring(qualities.length() - l); readBases = readBases.substring(0, readBases.length() - l); read = read.substring(0, read.length() - l); qualities = qualities.substring(0, qualities.length() - l); } else { softClipStartBases = readBases.substring(0, l - 1); softClipStartQualities = qualities.substring(0, l - 1); readBases = readBases.substring(l); read = read.substring(l); qualities = qualities.substring(l); } } if (CigarOperator.S == e2.getOperator()) { if (space == SRMAUtil.Space.COLORSPACE) { throw new Exception( "Error. Soft clipping with color-space data not currently supported."); } int l = e2.getLength(); if (strand) { // reverse softClipEndBases = readBases.substring(0, l - 1); softClipEndQualities = qualities.substring(0, l - 1); readBases = readBases.substring(l); read = read.substring(l); qualities = qualities.substring(l); } else { softClipEndBases = readBases.substring(readBases.length() - l); softClipEndQualities = qualities.substring(qualities.length() - l); readBases = readBases.substring(0, readBases.length() - l); read = read.substring(0, read.length() - l); qualities = qualities.substring(0, qualities.length() - l); } } } // Remove mate pair information Align.removeMateInfo(rec); comp = new AlignHeapNodeComparator( (strand) ? AlignHeap.HeapType.MAXHEAP : AlignHeap.HeapType.MINHEAP); // Bound by original alignment if possible bestAlignHeapNode = Align.boundWithOriginalAlignment( rec, graph, recNode, comp, strand, read, qualities, readBases, space, sequence, alleleCoverageCutoffs, useSequenceQualities, MAXIMUM_TOTAL_COVERAGE, MAX_HEAP_SIZE); /* System.err.println("readName="+rec.getReadName()); if(null != bestAlignHeapNode) { System.err.println("\nFOUND BEST:" + rec.toString()); } else { System.err.println("\nNOT FOUND (BEST): " + rec.toString()); } Align.updateSAM(rec, programRecord, bestAlignHeapNode, space, read, qualities, softClipStartBases, softClipStartQualities, softClipEndBases, softClipEndQualities, strand, correctBases); return; */ heap = new AlignHeap((strand) ? AlignHeap.HeapType.MAXHEAP : AlignHeap.HeapType.MINHEAP); // Add start nodes if (strand) { // reverse alignmentStart = rec.getAlignmentEnd(); for (i = alignmentStart + offset; alignmentStart - offset <= i; i--) { int position = graph.getPriorityQueueIndexAtPositionOrBefore(i); PriorityQueue<Node> startNodeQueue = graph.getPriorityQueue(position); if (0 != position && null != startNodeQueue) { Iterator<Node> startNodeQueueIter = startNodeQueue.iterator(); while (startNodeQueueIter.hasNext()) { Node startNode = startNodeQueueIter.next(); int f = passFilters(graph, startNode, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE); if (0 == f) { heap.add( new AlignHeapNode( null, startNode, startNode.coverage, read.charAt(0), qualities.charAt(0), useSequenceQualities, space)); } else if (f < 0) { return; } if (startNode.position < i) { i = startNode.position; } numStartNodesAdded++; } } } } else { alignmentStart = rec.getAlignmentStart(); for (i = alignmentStart - offset; i <= alignmentStart + offset; i++) { int position = graph.getPriorityQueueIndexAtPositionOrGreater(i); PriorityQueue<Node> startNodeQueue = graph.getPriorityQueue(position); if (0 != position && null != startNodeQueue) { Iterator<Node> startNodeQueueIter = startNodeQueue.iterator(); while (startNodeQueueIter.hasNext()) { Node startNode = startNodeQueueIter.next(); int f = passFilters(graph, startNode, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE); if (0 == f) { heap.add( new AlignHeapNode( null, startNode, startNode.coverage, read.charAt(0), qualities.charAt(0), useSequenceQualities, space)); } else if (f < 0) { return; } if (i < startNode.position) { i = startNode.position; } numStartNodesAdded++; } } } } if (numStartNodesAdded == 0) { throw new Exception("Did not add any start nodes!"); } // Get first node off the heap curAlignHeapNode = heap.poll(); while (null != curAlignHeapNode) { if (MAX_HEAP_SIZE <= heap.size()) { // too many to consider return; } // System.err.println("strand:" + strand + "\tsize:" + heap.size() + "\talignmentStart:" + // alignmentStart + "\toffset:" + offset + "\treadOffset:" + curAlignHeapNode.readOffset); // System.err.print("size:" + heap.size() + ":" + curAlignHeapNode.readOffset + ":" + // curAlignHeapNode.score + ":" + curAlignHeapNode.alleleCoverageSum + ":" + // curAlignHeapNode.startPosition + "\t"); // curAlignHeapNode.node.print(System.err); // System.err.print("\rposition:" + curAlignHeapNode.node.position + "\treadOffset:" + // curAlignHeapNode.readOffset); // Remove all non-insertions with the same contig/pos/read-offset/type/base and lower score nextAlignHeapNode = heap.peek(); while (Node.INSERTION != curAlignHeapNode.node.type && null != nextAlignHeapNode && 0 == comp.compare(curAlignHeapNode, nextAlignHeapNode)) { if (curAlignHeapNode.score < nextAlignHeapNode.score || (curAlignHeapNode.score == nextAlignHeapNode.score && curAlignHeapNode.alleleCoverageSum < nextAlignHeapNode.alleleCoverageSum)) { // Update current node curAlignHeapNode = heap.poll(); } else { // Ignore next node heap.poll(); } nextAlignHeapNode = heap.peek(); } nextAlignHeapNode = null; // Check if the alignment is complete if (curAlignHeapNode.readOffset == read.length() - 1) { // All read bases examined, store if has the best alignment. // System.err.print(curAlignHeapNode.alleleCoverageSum + ":" + curAlignHeapNode.score + // ":"); // System.err.print(curAlignHeapNode.startPosition + ":"); // curAlignHeapNode.node.print(System.err); if (null == bestAlignHeapNode || bestAlignHeapNode.score < curAlignHeapNode.score || (bestAlignHeapNode.score == curAlignHeapNode.score && bestAlignHeapNode.alleleCoverageSum < curAlignHeapNode.alleleCoverageSum)) { bestAlignHeapNode = curAlignHeapNode; } } else if (null != bestAlignHeapNode && curAlignHeapNode.score < bestAlignHeapNode.score) { // ignore, under the assumption that scores can only become more negative. } else { if (strand) { // reverse // Go to all the "prev" nodes iter = curAlignHeapNode.node.prev.listIterator(); } else { // forward // Go to all "next" nodes iter = curAlignHeapNode.node.next.listIterator(); } while (iter.hasNext()) { NodeRecord next = iter.next(); int f = passFilters( graph, next.node, next.coverage, alleleCoverageCutoffs, MAXIMUM_TOTAL_COVERAGE); if (0 == f) { heap.add( new AlignHeapNode( curAlignHeapNode, next.node, next.coverage, read.charAt(curAlignHeapNode.readOffset + 1), qualities.charAt(curAlignHeapNode.readOffset + 1), useSequenceQualities, space)); } else if (f < 0) { return; } } iter = null; } // Get next node curAlignHeapNode = heap.poll(); } // Recover alignment Align.updateSAM( rec, sequence, programRecord, bestAlignHeapNode, space, read, qualities, softClipStartBases, softClipStartQualities, softClipEndBases, softClipEndQualities, strand, correctBases); }