@Test public void testLastToCompact1() throws IOException { // test LastToCompact convert+filtering // read fake data and convert+filter final LastToCompactMode processor = new LastToCompactMode(); final int LAST_TO_COMPACT_M_PARAM = 2; processor.setAmbiguityThreshold(LAST_TO_COMPACT_M_PARAM); processor.setInputFile("test-results/alignments/last-to-compact/last-101.maf"); processor.setOutputFile("test-results/alignments/last-to-compact/last-101.compact"); processor.setTargetReferenceIdsFilename( "test-results/alignments/last-to-compact/last-reference.compact-reads"); processor.setOnlyMafFile(true); processor.setNumberOfReads(2857819); processor.setLargestQueryIndex(2857819); processor.setSmallestQueryIndex(0); processor.setPropagateQueryIds(false); processor.setPropagateTargetIds(true); processor.execute(); // read compact alignment results final AlignmentReaderImpl reader = new AlignmentReaderImpl(processor.getOutputFile()); reader.readHeader(); assertEquals(2857819, reader.getNumberOfQueries()); assertEquals(1, reader.getNumberOfTargets()); assertTrue(reader.hasQueryIndexOccurrences()); // lookup tables final Int2IntOpenHashMap queryIndex2NumberOfHits = new Int2IntOpenHashMap(); final Int2FloatOpenHashMap queryIndex2Score = new Int2FloatOpenHashMap(); final Int2IntOpenHashMap queryIndex2Multiplicity = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2NumberOfIndels = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2NumberOfMismatches = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2Position = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2QueryAlignedLength = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2QueryPosition = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2TargetIndex = new Int2IntOpenHashMap(); final Int2BooleanOpenHashMap queryIndex2MatchingReverseStrand = new Int2BooleanOpenHashMap(); final Int2IntOpenHashMap queryIndex2TargetAlignedLength = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2QueryIndexOcc = new Int2IntOpenHashMap(); final Int2IntOpenHashMap queryIndex2Ambiguity = new Int2IntOpenHashMap(); // enter alignment data int qii; while (reader.hasNext()) { final Alignments.AlignmentEntry aln = reader.next(); qii = aln.getQueryIndex(); final int numHits = queryIndex2NumberOfHits.get(qii); queryIndex2NumberOfHits.put(qii, numHits + 1); queryIndex2Score.put(qii, aln.getScore()); queryIndex2Multiplicity.put(qii, aln.getMultiplicity()); queryIndex2NumberOfIndels.put(qii, aln.getNumberOfIndels()); queryIndex2NumberOfMismatches.put(qii, aln.getNumberOfMismatches()); queryIndex2Position.put(qii, aln.getPosition()); queryIndex2QueryAlignedLength.put(qii, aln.getQueryAlignedLength()); queryIndex2QueryPosition.put(qii, aln.getQueryPosition()); queryIndex2TargetIndex.put(qii, aln.getTargetIndex()); queryIndex2MatchingReverseStrand.put(qii, aln.getMatchingReverseStrand()); queryIndex2TargetAlignedLength.put(qii, aln.getTargetAlignedLength()); queryIndex2QueryIndexOcc.put(qii, aln.getQueryIndexOccurrences()); queryIndex2Ambiguity.put(qii, aln.getAmbiguity()); } // // validate alignment data using values from getMafInput() below // // there are a total of 5 entries with ID 2857818 // 2 entries have score = 35 (the maximum score for this ID) // 3 entries are filtered b/c their scores are below 35 qii = 2857818; assertEquals(queryIndex2NumberOfHits.get(qii), 2); assertEquals((int) queryIndex2Score.get(qii), 35); assertEquals(queryIndex2Multiplicity.get(qii), 1); assertEquals(queryIndex2NumberOfIndels.get(qii), 0); assertEquals(queryIndex2NumberOfMismatches.get(qii), 0); assertEquals(queryIndex2Position.get(qii), 1614); // last entry added assertEquals(queryIndex2QueryAlignedLength.get(qii), 35); assertEquals(queryIndex2QueryPosition.get(qii), 0); assertEquals(queryIndex2TargetIndex.get(qii), 0); assertEquals(queryIndex2MatchingReverseStrand.get(qii), false); assertEquals(queryIndex2TargetAlignedLength.get(qii), 35); assertEquals(2, queryIndex2QueryIndexOcc.get(qii)); assertEquals(2, queryIndex2Ambiguity.get(qii)); // there are 5 entries with the score = 35 (the maximum score for this ID) // filtered due to ambiguity qii = 577287; assertEquals(queryIndex2NumberOfHits.get(qii), 0); assertEquals((int) queryIndex2Score.get(qii), 0); assertEquals(queryIndex2Multiplicity.get(qii), 0); assertEquals(queryIndex2NumberOfIndels.get(qii), 0); assertEquals(queryIndex2NumberOfMismatches.get(qii), 0); assertEquals(queryIndex2Position.get(qii), 0); assertEquals(queryIndex2QueryAlignedLength.get(qii), 0); assertEquals(queryIndex2QueryPosition.get(qii), 0); assertEquals(queryIndex2TargetIndex.get(qii), 0); assertEquals(queryIndex2MatchingReverseStrand.get(qii), false); assertEquals(queryIndex2TargetAlignedLength.get(qii), 0); assertEquals(0, queryIndex2QueryIndexOcc.get(qii)); assertEquals(0, queryIndex2Ambiguity.get(qii)); // reader.close(); }
/** * Construct alignment blocks from the Goby alignment entry. This method uses the convention that * '=' denotes a match to the reference. * * <p>Conventions for storing sequence variations in Goby alignments are described <a * href="http://tinyurl.com/goby-sequence-variations">here</a> * * @param alignmentEntry The Goby alignment entry to use */ public void buildBlocks(Alignments.AlignmentEntry alignmentEntry) { ObjectArrayList<AlignmentBlock> blocks = new ObjectArrayList<AlignmentBlock>(); ObjectArrayList<AlignmentBlock> insertionBlocks = new ObjectArrayList<AlignmentBlock>(); int start = alignmentEntry.getPosition(); ByteArrayList bases = new ByteArrayList(); ByteArrayList scores = new ByteArrayList(); int readLength = alignmentEntry.getQueryLength(); byte[] readBases = new byte[readLength]; byte[] readQual = new byte[readLength]; Arrays.fill(readBases, (byte) '='); if (alignmentEntry.hasReadQualityScores()) { readQual = alignmentEntry.getReadQualityScores().toByteArray(); } else { Arrays.fill(readQual, (byte) 40); } int j = 0; int insertedBases = 0; int deletedBases = 0; final int leftPadding = alignmentEntry.getQueryPosition(); boolean showSoftClipped = PreferenceManager.getInstance().getAsBoolean(PreferenceManager.SAM_SHOW_SOFT_CLIPPED); if (showSoftClipped && entry.hasSoftClippedBasesLeft()) { int clipLength = entry.getSoftClippedBasesLeft().length(); addSoftClipBlock( blocks, Math.max(0, entry.getPosition() - clipLength), entry.getSoftClippedBasesLeft(), readQual, entry.hasSoftClippedQualityLeft(), entry.getSoftClippedQualityLeft().toByteArray(), 0); } for (Alignments.SequenceVariation var : alignmentEntry.getSequenceVariationsList()) { final String from = var.getFrom(); final int fromLength = from.length(); final String to = var.getTo(); final int toLength = from.length(); final int sequenceVariationLength = Math.max(fromLength, toLength); final ByteString toQuality = var.getToQuality(); if (hasReadInsertion(from)) { bases.clear(); scores.clear(); for (int i = 0; i < sequenceVariationLength; i++) { final char toChar = i >= toLength ? '-' : to.charAt(i); int size = toQuality.size(); final byte qual = size > 0 && i < size ? toQuality.byteAt(i) : 40; bases.add((byte) toChar); scores.add(qual); deletedBases++; } addBlock(insertionBlocks, alignmentEntry.getPosition() + var.getPosition(), bases, scores); bases.clear(); scores.clear(); } else if (!to.contains("-")) { for (int i = 0; i < toLength; i++) { final int offset = j + var.getPosition() + i - 1 + leftPadding - insertedBases; if (offset > 0 && offset < readBases.length) { readBases[offset] = (byte) to.charAt(i); if (i < toQuality.size()) { readQual[offset] = toQuality.byteAt(i); } } } } else { // has read deletion: insertedBases++; } } int pos = start; int matchLength = alignmentEntry.getQueryAlignedLength() - deletedBases; int endAlignmentRefPosition = matchLength + start; bases.clear(); scores.clear(); int maxIndex = Math.min(readBases.length, readQual.length); while (pos < endAlignmentRefPosition) { final int index = pos - start + leftPadding; if (index < maxIndex) { bases.add(readBases[index]); scores.add(readQual[index]); } else { break; } ++pos; } addBlock(blocks, start, bases, scores); blocks = introduceDeletions(blocks, entry); if (showSoftClipped && entry.hasSoftClippedBasesRight()) { int targetAlignedLength = entry.getTargetAlignedLength(); addSoftClipBlock( blocks, entry.getPosition() + targetAlignedLength, entry.getSoftClippedBasesRight(), readQual, entry.hasSoftClippedQualityRight(), entry.getSoftClippedQualityRight().toByteArray(), entry.getQueryAlignedLength() + entry.getSoftClippedBasesLeft().length()); } block = blocks.toArray(new AlignmentBlock[blocks.size()]); Arrays.sort(block, blockComparator); insertionBlock = insertionBlocks.toArray(new AlignmentBlock[insertionBlocks.size()]); Arrays.sort(insertionBlock, blockComparator); ObjectArrayList<GobyAlignment> list = null; if (alignmentEntry.hasSplicedForwardAlignmentLink() || alignmentEntry.hasSplicedBackwardAlignmentLink()) { // if has a forward link, store a reference to this alignment in the reader (which represents // the window scope) list = iterator.cacheSpliceComponent(this); if (list.size() > 1 && spliceListIsValid(list)) { final GobyAlignment spliceHeadAlignment = list.get(0); ObjectArrayList<AlignmentBlock> splicedBlocks = new ObjectArrayList<AlignmentBlock>(); splicedBlocks.addAll(ObjectArrayList.wrap(spliceHeadAlignment.block)); splicedBlocks.addAll(blocks); spliceHeadAlignment.block = splicedBlocks.toArray(new AlignmentBlock[splicedBlocks.size()]); ObjectArrayList<AlignmentBlock> splicedInsertionBlocks = new ObjectArrayList<AlignmentBlock>(); splicedInsertionBlocks.addAll(ObjectArrayList.wrap(spliceHeadAlignment.insertionBlock)); splicedInsertionBlocks.addAll(insertionBlocks); spliceHeadAlignment.insertionBlock = splicedInsertionBlocks.toArray(new AlignmentBlock[splicedInsertionBlocks.size()]); if (spliceHeadAlignment.gapTypes == null) { spliceHeadAlignment.gapTypes = new CharArrayList(10); } spliceHeadAlignment.gapTypes.add(SamAlignment.SKIPPED_REGION); // Since the previous alignment carries this information, we clear up block and // insertionBlock // in this alignment, but keep any softClips: this.block = keepSoftClips(block); this.insertionBlock = new AlignmentBlock[0]; } } }