public String getPairOrientation() { // LOG.info("getPairOrientation"); String pairOrientation = ""; if (EntryFlagHelper.isPaired(entry) && !EntryFlagHelper.isMateUnmapped(entry) && entry.getTargetIndex() == entry.getPairAlignmentLink().getTargetIndex()) { char s1 = EntryFlagHelper.isReadReverseStrand(entry) ? 'R' : 'F'; char s2 = EntryFlagHelper.isMateReverseStrand(entry) ? 'R' : 'F'; char o1 = ' '; char o2 = ' '; char[] tmp = new char[4]; if (EntryFlagHelper.isFirstInPair(entry)) { o1 = '1'; o2 = '2'; } else if (EntryFlagHelper.isSecondInPair(entry)) { o1 = '2'; o2 = '1'; } if (getInferredInsertSize() > 0) { tmp[0] = s1; tmp[1] = o1; tmp[2] = s2; tmp[3] = o2; } else { tmp[2] = s1; tmp[3] = o1; tmp[0] = s2; tmp[1] = o2; } pairOrientation = new String(tmp); } return pairOrientation; }
public int getMappingQuality() { if (entry.hasMappingQuality()) { return entry.getMappingQuality(); } else { return 255; } }
/** * Return the next alignment. * * @return the next alignment within the window. */ public Alignment next() { if (!hasNext()) { throw new NoSuchElementException(); } Alignments.AlignmentEntry entry; entry = nextEntry; nextEntry = null; // LOG.debug(String.format("next targetIndex: %d position: %d", targetIndex, // entry.getPosition())); currentPosition = entry.getPosition(); return new GobyAlignment(this, entry); }
/** * This method splits blocks whose boundaries contain a read deletion. * * @param blocks * @param alignmentEntry * @return */ private ObjectArrayList<AlignmentBlock> introduceDeletions( ObjectArrayList<AlignmentBlock> blocks, Alignments.AlignmentEntry alignmentEntry) { ObjectArrayList<AlignmentBlock> newBlocks = new ObjectArrayList<AlignmentBlock>(); for (Alignments.SequenceVariation var : alignmentEntry.getSequenceVariationsList()) { for (AlignmentBlock block : blocks) { if (!block.isSoftClipped()) { final int vrPos = var.getPosition() + entry.getPosition(); if (hasReadDeletion(var) && vrPos >= block.getStart() && vrPos <= block.getEnd()) { ByteList leftBases = new ByteArrayList(block.getBases()); ByteList leftScores = new ByteArrayList(block.getQualities()); ByteList rightBases = new ByteArrayList(block.getBases()); ByteList rightScores = new ByteArrayList(block.getQualities()); int deletionPosition = var.getPosition() - 1; leftBases = leftBases.subList(0, deletionPosition); rightBases = rightBases.subList(deletionPosition, rightBases.size()); leftScores = leftScores.subList(0, deletionPosition); rightScores = rightScores.subList(deletionPosition, rightScores.size()); AlignmentBlock left = AlignmentBlock.getInstance( block.getStart(), leftBases.toByteArray(new byte[leftBases.size()]), leftScores.toByteArray(new byte[leftScores.size()]), this); AlignmentBlock right = AlignmentBlock.getInstance( block.getStart() + leftBases.size() + var.getFrom().length(), rightBases.toByteArray(new byte[rightBases.size()]), rightScores.toByteArray(new byte[rightScores.size()]), this); blocks.remove(block); newBlocks.add(left); newBlocks.add(right); } } } } newBlocks.addAll(blocks); return newBlocks; }
/** * Returns the mate for a paired-end read. Please note that this method will return an unmapped * mate for any single end read as well. Do check if the read is paired before calling getMate(). * * @return The mate, or a constant unmapped mate (for single end reads, or paired end where the * mate is not found). */ public ReadMate getMate() { if (entry.hasPairAlignmentLink()) { Alignments.RelatedAlignmentEntry link = entry.getPairAlignmentLink(); String mateChr = getChromosome(link.getTargetIndex()); int mateStart = link.getPosition(); boolean mateNegativeStrand = EntryFlagHelper.isMateReverseStrand(entry); boolean isReadUnmappedFlag = EntryFlagHelper.isReadUnmapped(entry); final ReadMate mate = new ReadMate(mateChr, mateStart, mateNegativeStrand, isReadUnmappedFlag); return mate; } else { return unmappedMate; } }
public boolean isPaired() { if (entry.hasPairFlags()) { return EntryFlagHelper.isPaired(entry); } else return false; }
public String getValueString(double position, WindowFunction windowFunction) { // //LOG.info("getValueString"); MutableString buffer = new MutableString(); buffer.append(entry.toString()); buffer.replace("\n", "<br>"); if (this.isPaired()) { buffer.append("----------------------" + "<br>"); buffer.append("Pair start = " + getMate().positionString() + "<br>"); buffer.append("Pair is mapped = " + (getMate().isMapped() ? "yes" : "no") + "<br>"); // buf.append("Pair is proper = " + (getProperPairFlag() ? "yes" : "no") + "<br>"); if (getChr().equals(getMate().getChr())) { buffer.append("Insert size = " + getInferredInsertSize() + "<br>"); } if (getPairOrientation().length() > 0) { buffer.append("Pair orientation = " + getPairOrientation() + "<br>"); } if (isFirstOfPair()) { buffer.append("First of pair <br>"); } if (isSecondOfPair()) { buffer.append("Second of pair <br>"); } } return buffer.toString(); }
/** * Determine if this iterator has more alignment entries in the given window. * * @return True if next() will return an alignment, False otherwise. */ public boolean hasNext() { /* LOG.debug(String.format("previousPosition: %d endReferencePosition %d previousReferenceIndex %d targetIndex %d", previousPosition, endReferencePosition, previousReferenceIndex, targetIndex) ); */ // Fetch the next entry with skipTo if (nextEntry != null) return true; try { if (!useWindow) { // all results are returned if (!reader.hasNext()) return false; nextEntry = reader.next(); } else { // we return only within a window nextEntry = reader.skipTo(targetIndex, startReferencePosition); if (nextEntry == null || (nextEntry.getTargetIndex() != targetIndex || nextEntry.getPosition() < startReferencePosition || nextEntry.getPosition() > endReferencePosition)) { // No next entry, on a different target sequence, or before the position of interest: nextEntry = null; } } } catch (IOException e) { nextEntry = null; LOG.error(e); // throw new RuntimeException("IO error reading next Goby alignment entry", e); return false; } catch (GobyRuntimeException e) { nextEntry = null; LOG.error(e); // throw new RuntimeException("IO error reading next Goby alignment entry", e); return false; } final boolean result = nextEntry != null; // LOG.debug("hasNext returning :" + result); return result; }
/** * Verify that the list has an appropriate unbroken chain of back links. * * @param list the list of splices to validate * @return true if the list has an unbroken chain of back links */ boolean spliceListIsValid(final ObjectArrayList<GobyAlignment> list) { if (list != null && list.size() > 1) { Alignments.AlignmentEntry prevEntry = list.get(0).entry; for (int i = 1; i < list.size(); i++) { Alignments.AlignmentEntry currentEntry = list.get(i).entry; if (!currentEntry.hasSplicedBackwardAlignmentLink()) return false; else { Alignments.RelatedAlignmentEntry currentBackwardLink = currentEntry.getSplicedBackwardAlignmentLink(); if ((prevEntry.getQueryIndex() != currentEntry.getQueryIndex()) || (prevEntry.getFragmentIndex() != currentBackwardLink.getFragmentIndex()) || (prevEntry.getPosition() != currentBackwardLink.getPosition()) || (prevEntry.getTargetIndex() != currentBackwardLink.getTargetIndex())) { return false; } } prevEntry = currentEntry; } } return true; }
public boolean isNegativeStrand() { // //LOG.info("isNegativeStrand"); return entry.getMatchingReverseStrand(); }
/** * Construct alignment blocks from the Goby alignment entry. This method uses the convention that * '=' denotes a match to the reference. * * <p>Conventions for storing sequence variations in Goby alignments are described <a * href="http://tinyurl.com/goby-sequence-variations">here</a> * * @param alignmentEntry The Goby alignment entry to use */ public void buildBlocks(Alignments.AlignmentEntry alignmentEntry) { ObjectArrayList<AlignmentBlock> blocks = new ObjectArrayList<AlignmentBlock>(); ObjectArrayList<AlignmentBlock> insertionBlocks = new ObjectArrayList<AlignmentBlock>(); int start = alignmentEntry.getPosition(); ByteArrayList bases = new ByteArrayList(); ByteArrayList scores = new ByteArrayList(); int readLength = alignmentEntry.getQueryLength(); byte[] readBases = new byte[readLength]; byte[] readQual = new byte[readLength]; Arrays.fill(readBases, (byte) '='); if (alignmentEntry.hasReadQualityScores()) { readQual = alignmentEntry.getReadQualityScores().toByteArray(); } else { Arrays.fill(readQual, (byte) 40); } int j = 0; int insertedBases = 0; int deletedBases = 0; final int leftPadding = alignmentEntry.getQueryPosition(); boolean showSoftClipped = PreferenceManager.getInstance().getAsBoolean(PreferenceManager.SAM_SHOW_SOFT_CLIPPED); if (showSoftClipped && entry.hasSoftClippedBasesLeft()) { int clipLength = entry.getSoftClippedBasesLeft().length(); addSoftClipBlock( blocks, Math.max(0, entry.getPosition() - clipLength), entry.getSoftClippedBasesLeft(), readQual, entry.hasSoftClippedQualityLeft(), entry.getSoftClippedQualityLeft().toByteArray(), 0); } for (Alignments.SequenceVariation var : alignmentEntry.getSequenceVariationsList()) { final String from = var.getFrom(); final int fromLength = from.length(); final String to = var.getTo(); final int toLength = from.length(); final int sequenceVariationLength = Math.max(fromLength, toLength); final ByteString toQuality = var.getToQuality(); if (hasReadInsertion(from)) { bases.clear(); scores.clear(); for (int i = 0; i < sequenceVariationLength; i++) { final char toChar = i >= toLength ? '-' : to.charAt(i); int size = toQuality.size(); final byte qual = size > 0 && i < size ? toQuality.byteAt(i) : 40; bases.add((byte) toChar); scores.add(qual); deletedBases++; } addBlock(insertionBlocks, alignmentEntry.getPosition() + var.getPosition(), bases, scores); bases.clear(); scores.clear(); } else if (!to.contains("-")) { for (int i = 0; i < toLength; i++) { final int offset = j + var.getPosition() + i - 1 + leftPadding - insertedBases; if (offset > 0 && offset < readBases.length) { readBases[offset] = (byte) to.charAt(i); if (i < toQuality.size()) { readQual[offset] = toQuality.byteAt(i); } } } } else { // has read deletion: insertedBases++; } } int pos = start; int matchLength = alignmentEntry.getQueryAlignedLength() - deletedBases; int endAlignmentRefPosition = matchLength + start; bases.clear(); scores.clear(); int maxIndex = Math.min(readBases.length, readQual.length); while (pos < endAlignmentRefPosition) { final int index = pos - start + leftPadding; if (index < maxIndex) { bases.add(readBases[index]); scores.add(readQual[index]); } else { break; } ++pos; } addBlock(blocks, start, bases, scores); blocks = introduceDeletions(blocks, entry); if (showSoftClipped && entry.hasSoftClippedBasesRight()) { int targetAlignedLength = entry.getTargetAlignedLength(); addSoftClipBlock( blocks, entry.getPosition() + targetAlignedLength, entry.getSoftClippedBasesRight(), readQual, entry.hasSoftClippedQualityRight(), entry.getSoftClippedQualityRight().toByteArray(), entry.getQueryAlignedLength() + entry.getSoftClippedBasesLeft().length()); } block = blocks.toArray(new AlignmentBlock[blocks.size()]); Arrays.sort(block, blockComparator); insertionBlock = insertionBlocks.toArray(new AlignmentBlock[insertionBlocks.size()]); Arrays.sort(insertionBlock, blockComparator); ObjectArrayList<GobyAlignment> list = null; if (alignmentEntry.hasSplicedForwardAlignmentLink() || alignmentEntry.hasSplicedBackwardAlignmentLink()) { // if has a forward link, store a reference to this alignment in the reader (which represents // the window scope) list = iterator.cacheSpliceComponent(this); if (list.size() > 1 && spliceListIsValid(list)) { final GobyAlignment spliceHeadAlignment = list.get(0); ObjectArrayList<AlignmentBlock> splicedBlocks = new ObjectArrayList<AlignmentBlock>(); splicedBlocks.addAll(ObjectArrayList.wrap(spliceHeadAlignment.block)); splicedBlocks.addAll(blocks); spliceHeadAlignment.block = splicedBlocks.toArray(new AlignmentBlock[splicedBlocks.size()]); ObjectArrayList<AlignmentBlock> splicedInsertionBlocks = new ObjectArrayList<AlignmentBlock>(); splicedInsertionBlocks.addAll(ObjectArrayList.wrap(spliceHeadAlignment.insertionBlock)); splicedInsertionBlocks.addAll(insertionBlocks); spliceHeadAlignment.insertionBlock = splicedInsertionBlocks.toArray(new AlignmentBlock[splicedInsertionBlocks.size()]); if (spliceHeadAlignment.gapTypes == null) { spliceHeadAlignment.gapTypes = new CharArrayList(10); } spliceHeadAlignment.gapTypes.add(SamAlignment.SKIPPED_REGION); // Since the previous alignment carries this information, we clear up block and // insertionBlock // in this alignment, but keep any softClips: this.block = keepSoftClips(block); this.insertionBlock = new AlignmentBlock[0]; } } }
/** * Transform the read index into a readname: * * @return */ public String getReadName() { return Integer.toString(entry.getQueryIndex()); }
/** Get the reference id from the iterator, prepend "chr". */ public String getChromosome() { return "chr" + iterator.indexToReferenceId.getId(entry.getTargetIndex()).toString(); }
public float getScore() { // LOG.info("getScore"); return entry.getScore(); }
public int getStart() { // //LOG.info("getStart"); return entry.getPosition(); }
public int getInferredInsertSize() { if (entry.hasInsertSize()) { return entry.getInsertSize(); } else return 0; }
@Test public void testLastToCompact1() throws IOException { // test LastToCompact convert+filtering // read fake data and convert+filter final LastToCompactMode processor = new LastToCompactMode(); final int LAST_TO_COMPACT_M_PARAM = 2; processor.setAmbiguityThreshold(LAST_TO_COMPACT_M_PARAM); processor.setInputFile("test-results/alignments/last-to-compact/last-101.maf"); processor.setOutputFile("test-results/alignments/last-to-compact/last-101.compact"); processor.setTargetReferenceIdsFilename( "test-results/alignments/last-to-compact/last-reference.compact-reads"); processor.setOnlyMafFile(true); processor.setNumberOfReads(2857819); processor.setLargestQueryIndex(2857819); processor.setSmallestQueryIndex(0); processor.setPropagateQueryIds(false); processor.setPropagateTargetIds(true); processor.execute(); // read compact alignment results final AlignmentReaderImpl reader = new AlignmentReaderImpl(processor.getOutputFile()); reader.readHeader(); assertEquals(2857819, reader.getNumberOfQueries()); assertEquals(1, reader.getNumberOfTargets()); assertTrue(reader.hasQueryIndexOccurrences()); // lookup tables final Int2IntOpenHashMap queryIndex2NumberOfHits = new Int2IntOpenHashMap(); final Int2FloatOpenHashMap queryIndex2Score = new Int2FloatOpenHashMap(); final Int2IntOpenHashMap queryIndex2Multiplicity = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2NumberOfIndels = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2NumberOfMismatches = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2Position = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2QueryAlignedLength = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2QueryPosition = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2TargetIndex = new Int2IntOpenHashMap(); final Int2BooleanOpenHashMap queryIndex2MatchingReverseStrand = new Int2BooleanOpenHashMap(); final Int2IntOpenHashMap queryIndex2TargetAlignedLength = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2QueryIndexOcc = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2Ambiguity = new Int2IntOpenHashMap(); // enter alignment data int qii; while (reader.hasNext()) { final Alignments.AlignmentEntry aln = reader.next(); qii = aln.getQueryIndex(); final int numHits = queryIndex2NumberOfHits.get(qii); queryIndex2NumberOfHits.put(qii, numHits + 1); queryIndex2Score.put(qii, aln.getScore()); queryIndex2Multiplicity.put(qii, aln.getMultiplicity()); queryIndex2NumberOfIndels.put(qii, aln.getNumberOfIndels()); queryIndex2NumberOfMismatches.put(qii, aln.getNumberOfMismatches()); queryIndex2Position.put(qii, aln.getPosition()); queryIndex2QueryAlignedLength.put(qii, aln.getQueryAlignedLength()); queryIndex2QueryPosition.put(qii, aln.getQueryPosition()); queryIndex2TargetIndex.put(qii, aln.getTargetIndex()); queryIndex2MatchingReverseStrand.put(qii, aln.getMatchingReverseStrand()); queryIndex2TargetAlignedLength.put(qii, aln.getTargetAlignedLength()); queryIndex2QueryIndexOcc.put(qii, aln.getQueryIndexOccurrences()); queryIndex2Ambiguity.put(qii, aln.getAmbiguity()); } // // validate alignment data using values from getMafInput() below // // there are a total of 5 entries with ID 2857818 // 2 entries have score = 35 (the maximum score for this ID) // 3 entries are filtered b/c their scores are below 35 qii = 2857818; assertEquals(queryIndex2NumberOfHits.get(qii), 2); assertEquals((int) queryIndex2Score.get(qii), 35); assertEquals(queryIndex2Multiplicity.get(qii), 1); assertEquals(queryIndex2NumberOfIndels.get(qii), 0); assertEquals(queryIndex2NumberOfMismatches.get(qii), 0); assertEquals(queryIndex2Position.get(qii), 1614); // last entry added assertEquals(queryIndex2QueryAlignedLength.get(qii), 35); assertEquals(queryIndex2QueryPosition.get(qii), 0); assertEquals(queryIndex2TargetIndex.get(qii), 0); assertEquals(queryIndex2MatchingReverseStrand.get(qii), false); assertEquals(queryIndex2TargetAlignedLength.get(qii), 35); assertEquals(2, queryIndex2QueryIndexOcc.get(qii)); assertEquals(2, queryIndex2Ambiguity.get(qii)); // there are 5 entries with the score = 35 (the maximum score for this ID) // filtered due to ambiguity qii = 577287; assertEquals(queryIndex2NumberOfHits.get(qii), 0); assertEquals((int) queryIndex2Score.get(qii), 0); assertEquals(queryIndex2Multiplicity.get(qii), 0); assertEquals(queryIndex2NumberOfIndels.get(qii), 0); assertEquals(queryIndex2NumberOfMismatches.get(qii), 0); assertEquals(queryIndex2Position.get(qii), 0); assertEquals(queryIndex2QueryAlignedLength.get(qii), 0); assertEquals(queryIndex2QueryPosition.get(qii), 0); assertEquals(queryIndex2TargetIndex.get(qii), 0); assertEquals(queryIndex2MatchingReverseStrand.get(qii), false); assertEquals(queryIndex2TargetAlignedLength.get(qii), 0); assertEquals(0, queryIndex2QueryIndexOcc.get(qii)); assertEquals(0, queryIndex2Ambiguity.get(qii)); // reader.close(); }
/** * Test the parsing of sequence variations from Last MAF file. * * @throws IOException */ @Test public void testLastToCompactVariations() throws IOException { // test LastToCompact convert+filtering // read fake data and convert+filter final LastToCompactMode processor = new LastToCompactMode(); final int LAST_TO_COMPACT_M_PARAM = 2; processor.setAmbiguityThreshold(LAST_TO_COMPACT_M_PARAM); processor.setInputFile("test-results/alignments/last-to-compact/last-103-variations.maf"); processor.setOutputFile("test-results/alignments/last-to-compact/last-103-variations"); processor.setTargetReferenceIdsFilename( "test-results/alignments/last-to-compact/last-reference.compact-reads"); processor.setOnlyMafFile(true); processor.setNumberOfReads(2857822); processor.setLargestQueryIndex(2857822); processor.setSmallestQueryIndex(0); processor.setPropagateQueryIds(false); processor.setPropagateTargetIds(true); processor.setQualityFilterParameters( "threshold=1"); // allow everything to pass through, important to detect all variations in // this test. processor.execute(); // read compact alignment results final AlignmentReaderImpl reader = new AlignmentReaderImpl(processor.getOutputFile()); reader.readHeader(); assertEquals(2857822, reader.getNumberOfQueries()); assertEquals(1, reader.getNumberOfTargets()); int entryIndex = 0; while (reader.hasNext()) { final Alignments.AlignmentEntry alignmentEntry = reader.next(); System.out.println(entryIndex + " entry : " + alignmentEntry); switch (entryIndex) { case 0: assertEquals(2, alignmentEntry.getSequenceVariationsCount()); final Alignments.SequenceVariation var1 = alignmentEntry.getSequenceVariations(0); assertEquals("C", var1.getTo()); assertEquals("G", var1.getFrom()); assertEquals(1, var1.getPosition()); final Alignments.SequenceVariation var2 = alignmentEntry.getSequenceVariations(1); assertEquals("C", var2.getTo()); assertEquals("A", var2.getFrom()); assertEquals(11, var2.getPosition()); break; case 1: assertEquals(1, alignmentEntry.getSequenceVariationsCount()); final Alignments.SequenceVariation var1_0 = alignmentEntry.getSequenceVariations(0); assertEquals("---", var1_0.getTo()); assertEquals("TTT", var1_0.getFrom()); assertEquals(3, var1_0.getPosition()); break; case 2: assertEquals(2, alignmentEntry.getSequenceVariationsCount()); final Alignments.SequenceVariation var2_1 = alignmentEntry.getSequenceVariations(0); assertEquals("A", var2_1.getTo()); assertEquals("G", var2_1.getFrom()); assertEquals(2, var2_1.getPosition()); final Alignments.SequenceVariation var2_2 = alignmentEntry.getSequenceVariations(1); assertEquals("A", var2_2.getTo()); assertEquals("-", var2_2.getFrom()); assertEquals(29, var2_2.getPosition()); break; default: break; } entryIndex++; } assertEquals(4, entryIndex); }
public int getAlignmentEnd() { // LOG.info("getAlignmentEnd"); return entry.getPosition() + entry.getTargetAlignedLength(); }