@Test
  public void testLastToCompact1() throws IOException {

    // test LastToCompact convert+filtering

    // read fake data and convert+filter
    final LastToCompactMode processor = new LastToCompactMode();
    final int LAST_TO_COMPACT_M_PARAM = 2;
    processor.setAmbiguityThreshold(LAST_TO_COMPACT_M_PARAM);
    processor.setInputFile("test-results/alignments/last-to-compact/last-101.maf");
    processor.setOutputFile("test-results/alignments/last-to-compact/last-101.compact");
    processor.setTargetReferenceIdsFilename(
        "test-results/alignments/last-to-compact/last-reference.compact-reads");
    processor.setOnlyMafFile(true);
    processor.setNumberOfReads(2857819);
    processor.setLargestQueryIndex(2857819);
    processor.setSmallestQueryIndex(0);
    processor.setPropagateQueryIds(false);
    processor.setPropagateTargetIds(true);
    processor.execute();

    // read compact alignment results
    final AlignmentReaderImpl reader = new AlignmentReaderImpl(processor.getOutputFile());
    reader.readHeader();
    assertEquals(2857819, reader.getNumberOfQueries());
    assertEquals(1, reader.getNumberOfTargets());
    assertTrue(reader.hasQueryIndexOccurrences());

    // lookup tables
    final Int2IntOpenHashMap queryIndex2NumberOfHits = new Int2IntOpenHashMap();

    final Int2FloatOpenHashMap queryIndex2Score = new Int2FloatOpenHashMap();
    final Int2IntOpenHashMap queryIndex2Multiplicity = new Int2IntOpenHashMap();
    final Int2IntOpenHashMap queryIndex2NumberOfIndels = new Int2IntOpenHashMap();
    final Int2IntOpenHashMap queryIndex2NumberOfMismatches = new Int2IntOpenHashMap();
    final Int2IntOpenHashMap queryIndex2Position = new Int2IntOpenHashMap();
    final Int2IntOpenHashMap queryIndex2QueryAlignedLength = new Int2IntOpenHashMap();
    final Int2IntOpenHashMap queryIndex2QueryPosition = new Int2IntOpenHashMap();
    final Int2IntOpenHashMap queryIndex2TargetIndex = new Int2IntOpenHashMap();
    final Int2BooleanOpenHashMap queryIndex2MatchingReverseStrand = new Int2BooleanOpenHashMap();
    final Int2IntOpenHashMap queryIndex2TargetAlignedLength = new Int2IntOpenHashMap();
    final Int2IntOpenHashMap queryIndex2QueryIndexOcc = new Int2IntOpenHashMap();
    final Int2IntOpenHashMap queryIndex2Ambiguity = new Int2IntOpenHashMap();

    // enter alignment data
    int qii;
    while (reader.hasNext()) {

      final Alignments.AlignmentEntry aln = reader.next();
      qii = aln.getQueryIndex();

      final int numHits = queryIndex2NumberOfHits.get(qii);

      queryIndex2NumberOfHits.put(qii, numHits + 1);

      queryIndex2Score.put(qii, aln.getScore());
      queryIndex2Multiplicity.put(qii, aln.getMultiplicity());
      queryIndex2NumberOfIndels.put(qii, aln.getNumberOfIndels());
      queryIndex2NumberOfMismatches.put(qii, aln.getNumberOfMismatches());
      queryIndex2Position.put(qii, aln.getPosition());
      queryIndex2QueryAlignedLength.put(qii, aln.getQueryAlignedLength());
      queryIndex2QueryPosition.put(qii, aln.getQueryPosition());
      queryIndex2TargetIndex.put(qii, aln.getTargetIndex());
      queryIndex2MatchingReverseStrand.put(qii, aln.getMatchingReverseStrand());
      queryIndex2TargetAlignedLength.put(qii, aln.getTargetAlignedLength());
      queryIndex2QueryIndexOcc.put(qii, aln.getQueryIndexOccurrences());
      queryIndex2Ambiguity.put(qii, aln.getAmbiguity());
    }

    //
    // validate alignment data using values from getMafInput() below
    //

    // there are a total of 5 entries with ID 2857818
    // 2 entries have score = 35 (the maximum score for this ID)
    // 3 entries are filtered b/c their scores are below 35
    qii = 2857818;
    assertEquals(queryIndex2NumberOfHits.get(qii), 2);
    assertEquals((int) queryIndex2Score.get(qii), 35);
    assertEquals(queryIndex2Multiplicity.get(qii), 1);
    assertEquals(queryIndex2NumberOfIndels.get(qii), 0);
    assertEquals(queryIndex2NumberOfMismatches.get(qii), 0);
    assertEquals(queryIndex2Position.get(qii), 1614); // last entry added
    assertEquals(queryIndex2QueryAlignedLength.get(qii), 35);
    assertEquals(queryIndex2QueryPosition.get(qii), 0);
    assertEquals(queryIndex2TargetIndex.get(qii), 0);
    assertEquals(queryIndex2MatchingReverseStrand.get(qii), false);
    assertEquals(queryIndex2TargetAlignedLength.get(qii), 35);
    assertEquals(2, queryIndex2QueryIndexOcc.get(qii));
    assertEquals(2, queryIndex2Ambiguity.get(qii));

    // there are 5 entries with the score = 35 (the maximum score for this ID)
    // filtered due to ambiguity
    qii = 577287;
    assertEquals(queryIndex2NumberOfHits.get(qii), 0);
    assertEquals((int) queryIndex2Score.get(qii), 0);
    assertEquals(queryIndex2Multiplicity.get(qii), 0);
    assertEquals(queryIndex2NumberOfIndels.get(qii), 0);
    assertEquals(queryIndex2NumberOfMismatches.get(qii), 0);
    assertEquals(queryIndex2Position.get(qii), 0);
    assertEquals(queryIndex2QueryAlignedLength.get(qii), 0);
    assertEquals(queryIndex2QueryPosition.get(qii), 0);
    assertEquals(queryIndex2TargetIndex.get(qii), 0);
    assertEquals(queryIndex2MatchingReverseStrand.get(qii), false);
    assertEquals(queryIndex2TargetAlignedLength.get(qii), 0);
    assertEquals(0, queryIndex2QueryIndexOcc.get(qii));
    assertEquals(0, queryIndex2Ambiguity.get(qii));

    //
    reader.close();
  }
예제 #2
0
  /**
   * Construct alignment blocks from the Goby alignment entry. This method uses the convention that
   * '=' denotes a match to the reference.
   *
   * <p>Conventions for storing sequence variations in Goby alignments are described <a
   * href="http://tinyurl.com/goby-sequence-variations">here</a>
   *
   * @param alignmentEntry The Goby alignment entry to use
   */
  public void buildBlocks(Alignments.AlignmentEntry alignmentEntry) {

    ObjectArrayList<AlignmentBlock> blocks = new ObjectArrayList<AlignmentBlock>();
    ObjectArrayList<AlignmentBlock> insertionBlocks = new ObjectArrayList<AlignmentBlock>();

    int start = alignmentEntry.getPosition();
    ByteArrayList bases = new ByteArrayList();
    ByteArrayList scores = new ByteArrayList();
    int readLength = alignmentEntry.getQueryLength();

    byte[] readBases = new byte[readLength];
    byte[] readQual = new byte[readLength];
    Arrays.fill(readBases, (byte) '=');
    if (alignmentEntry.hasReadQualityScores()) {
      readQual = alignmentEntry.getReadQualityScores().toByteArray();
    } else {
      Arrays.fill(readQual, (byte) 40);
    }
    int j = 0;
    int insertedBases = 0;
    int deletedBases = 0;
    final int leftPadding = alignmentEntry.getQueryPosition();
    boolean showSoftClipped =
        PreferenceManager.getInstance().getAsBoolean(PreferenceManager.SAM_SHOW_SOFT_CLIPPED);
    if (showSoftClipped && entry.hasSoftClippedBasesLeft()) {
      int clipLength = entry.getSoftClippedBasesLeft().length();

      addSoftClipBlock(
          blocks,
          Math.max(0, entry.getPosition() - clipLength),
          entry.getSoftClippedBasesLeft(),
          readQual,
          entry.hasSoftClippedQualityLeft(),
          entry.getSoftClippedQualityLeft().toByteArray(),
          0);
    }
    for (Alignments.SequenceVariation var : alignmentEntry.getSequenceVariationsList()) {
      final String from = var.getFrom();
      final int fromLength = from.length();
      final String to = var.getTo();
      final int toLength = from.length();
      final int sequenceVariationLength = Math.max(fromLength, toLength);
      final ByteString toQuality = var.getToQuality();

      if (hasReadInsertion(from)) {
        bases.clear();
        scores.clear();
        for (int i = 0; i < sequenceVariationLength; i++) {
          final char toChar = i >= toLength ? '-' : to.charAt(i);
          int size = toQuality.size();
          final byte qual = size > 0 && i < size ? toQuality.byteAt(i) : 40;

          bases.add((byte) toChar);
          scores.add(qual);
          deletedBases++;
        }
        addBlock(insertionBlocks, alignmentEntry.getPosition() + var.getPosition(), bases, scores);
        bases.clear();
        scores.clear();
      } else if (!to.contains("-")) {
        for (int i = 0; i < toLength; i++) {
          final int offset = j + var.getPosition() + i - 1 + leftPadding - insertedBases;
          if (offset > 0 && offset < readBases.length) {
            readBases[offset] = (byte) to.charAt(i);
            if (i < toQuality.size()) {
              readQual[offset] = toQuality.byteAt(i);
            }
          }
        }
      } else {
        // has read deletion:
        insertedBases++;
      }
    }

    int pos = start;
    int matchLength = alignmentEntry.getQueryAlignedLength() - deletedBases;
    int endAlignmentRefPosition = matchLength + start;
    bases.clear();
    scores.clear();
    int maxIndex = Math.min(readBases.length, readQual.length);
    while (pos < endAlignmentRefPosition) {
      final int index = pos - start + leftPadding;
      if (index < maxIndex) {
        bases.add(readBases[index]);
        scores.add(readQual[index]);
      } else {
        break;
      }
      ++pos;
    }

    addBlock(blocks, start, bases, scores);
    blocks = introduceDeletions(blocks, entry);
    if (showSoftClipped && entry.hasSoftClippedBasesRight()) {

      int targetAlignedLength = entry.getTargetAlignedLength();
      addSoftClipBlock(
          blocks,
          entry.getPosition() + targetAlignedLength,
          entry.getSoftClippedBasesRight(),
          readQual,
          entry.hasSoftClippedQualityRight(),
          entry.getSoftClippedQualityRight().toByteArray(),
          entry.getQueryAlignedLength() + entry.getSoftClippedBasesLeft().length());
    }
    block = blocks.toArray(new AlignmentBlock[blocks.size()]);
    Arrays.sort(block, blockComparator);
    insertionBlock = insertionBlocks.toArray(new AlignmentBlock[insertionBlocks.size()]);
    Arrays.sort(insertionBlock, blockComparator);
    ObjectArrayList<GobyAlignment> list = null;

    if (alignmentEntry.hasSplicedForwardAlignmentLink()
        || alignmentEntry.hasSplicedBackwardAlignmentLink()) {
      // if has a forward link, store a reference to this alignment in the reader (which represents
      // the window scope)
      list = iterator.cacheSpliceComponent(this);
      if (list.size() > 1 && spliceListIsValid(list)) {

        final GobyAlignment spliceHeadAlignment = list.get(0);

        ObjectArrayList<AlignmentBlock> splicedBlocks = new ObjectArrayList<AlignmentBlock>();
        splicedBlocks.addAll(ObjectArrayList.wrap(spliceHeadAlignment.block));
        splicedBlocks.addAll(blocks);
        spliceHeadAlignment.block = splicedBlocks.toArray(new AlignmentBlock[splicedBlocks.size()]);

        ObjectArrayList<AlignmentBlock> splicedInsertionBlocks =
            new ObjectArrayList<AlignmentBlock>();
        splicedInsertionBlocks.addAll(ObjectArrayList.wrap(spliceHeadAlignment.insertionBlock));
        splicedInsertionBlocks.addAll(insertionBlocks);
        spliceHeadAlignment.insertionBlock =
            splicedInsertionBlocks.toArray(new AlignmentBlock[splicedInsertionBlocks.size()]);

        if (spliceHeadAlignment.gapTypes == null) {
          spliceHeadAlignment.gapTypes = new CharArrayList(10);
        }
        spliceHeadAlignment.gapTypes.add(SamAlignment.SKIPPED_REGION);

        // Since the previous alignment carries this information, we clear up block and
        // insertionBlock
        // in this alignment, but keep any softClips:
        this.block = keepSoftClips(block);
        this.insertionBlock = new AlignmentBlock[0];
      }
    }
  }