@Test(dataProvider = "LIBSTest")
  public void testLIBS(LIBSTest params) {
    final int locus = 44367788;

    SAMRecord read =
        ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
    read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
    read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
    read.setCigarString(params.cigar);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(Arrays.asList(read), createTestReadProperties());
    final LIBS_position tester = new LIBS_position(read);

    while (li.hasNext()) {
      AlignmentContext alignmentContext = li.next();
      ReadBackedPileup p = alignmentContext.getBasePileup();
      Assert.assertTrue(p.getNumberOfElements() == 1);
      PileupElement pe = p.iterator().next();

      tester.stepForwardOnGenome();

      Assert.assertEquals(pe.isBeforeDeletedBase(), tester.isBeforeDeletedBase);
      Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
      Assert.assertEquals(pe.isAfterDeletedBase(), tester.isAfterDeletedBase);
      Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
      Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
      Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
      Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
      Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset());
    }
  }
  @Test
  public void testWholeIndelReadInIsolation() {
    final int firstLocus = 44367789;

    // create a test version of the Reads object
    ReadProperties readAttributes = createTestReadProperties();

    SAMRecord indelOnlyRead =
        ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
    indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
    indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
    indelOnlyRead.setCigarString("76I");

    List<SAMRecord> reads = Arrays.asList(indelOnlyRead);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, readAttributes);

    // Traditionally, reads that end with indels bleed into the pileup at the following locus.
    // Verify that the next pileup contains this read
    // and considers it to be an indel-containing read.
    Assert.assertTrue(
        li.hasNext(),
        "Should have found a whole-indel read in the normal base pileup without extended events enabled");
    AlignmentContext alignmentContext = li.next();
    Assert.assertEquals(
        alignmentContext.getLocation().getStart(),
        firstLocus,
        "Base pileup is at incorrect location.");
    ReadBackedPileup basePileup = alignmentContext.getBasePileup();
    Assert.assertEquals(basePileup.getReads().size(), 1, "Pileup is of incorrect size");
    Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect");
  }
  @Test
  public void testIndelsInRegularPileup() {
    final byte[] bases = new byte[] {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
    final byte[] indelBases =
        new byte[] {'A', 'A', 'A', 'A', 'C', 'T', 'A', 'A', 'A', 'A', 'A', 'A'};

    // create a test version of the Reads object
    ReadProperties readAttributes = createTestReadProperties();

    SAMRecord before = ArtificialSAMUtils.createArtificialRead(header, "before", 0, 1, 10);
    before.setReadBases(bases);
    before.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    before.setCigarString("10M");

    SAMRecord during = ArtificialSAMUtils.createArtificialRead(header, "during", 0, 2, 10);
    during.setReadBases(indelBases);
    during.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    during.setCigarString("4M2I6M");

    SAMRecord after = ArtificialSAMUtils.createArtificialRead(header, "after", 0, 3, 10);
    after.setReadBases(bases);
    after.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    after.setCigarString("10M");

    List<SAMRecord> reads = Arrays.asList(before, during, after);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, readAttributes);

    boolean foundIndel = false;
    while (li.hasNext()) {
      AlignmentContext context = li.next();
      ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10);
      for (PileupElement p : pileup) {
        if (p.isBeforeInsertion()) {
          foundIndel = true;
          Assert.assertEquals(p.getEventLength(), 2, "Wrong event length");
          Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect");
          break;
        }
      }
    }

    Assert.assertTrue(foundIndel, "Indel in pileup not found");
  }
  /**
   * Test to make sure that reads supporting only an indel (example cigar string: 76I) are
   * represented properly
   */
  @Test
  public void testWholeIndelReadRepresentedTest() {
    final int firstLocus = 44367788, secondLocus = firstLocus + 1;

    SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, secondLocus, 1);
    read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
    read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
    read1.setCigarString("1I");

    List<SAMRecord> reads = Arrays.asList(read1);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, createTestReadProperties());

    while (li.hasNext()) {
      AlignmentContext alignmentContext = li.next();
      ReadBackedPileup p = alignmentContext.getBasePileup();
      Assert.assertTrue(p.getNumberOfElements() == 1);
      PileupElement pe = p.iterator().next();
      Assert.assertTrue(pe.isBeforeInsertion());
      Assert.assertFalse(pe.isAfterInsertion());
      Assert.assertEquals(pe.getEventBases(), "A");
    }

    SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, secondLocus, 10);
    read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
    read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
    read2.setCigarString("10I");

    reads = Arrays.asList(read2);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, createTestReadProperties());

    while (li.hasNext()) {
      AlignmentContext alignmentContext = li.next();
      ReadBackedPileup p = alignmentContext.getBasePileup();
      Assert.assertTrue(p.getNumberOfElements() == 1);
      PileupElement pe = p.iterator().next();
      Assert.assertTrue(pe.isBeforeInsertion());
      Assert.assertFalse(pe.isAfterInsertion());
      Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA");
    }
  }
  @Test
  public void testXandEQOperators() {
    final byte[] bases1 = new byte[] {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
    final byte[] bases2 = new byte[] {'A', 'A', 'A', 'C', 'A', 'A', 'A', 'A', 'A', 'C'};

    // create a test version of the Reads object
    ReadProperties readAttributes = createTestReadProperties();

    SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header, "r1", 0, 1, 10);
    r1.setReadBases(bases1);
    r1.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    r1.setCigarString("10M");

    SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header, "r2", 0, 1, 10);
    r2.setReadBases(bases2);
    r2.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    r2.setCigarString("3=1X5=1X");

    SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header, "r3", 0, 1, 10);
    r3.setReadBases(bases2);
    r3.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    r3.setCigarString("3=1X5M1X");

    SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header, "r4", 0, 1, 10);
    r4.setReadBases(bases2);
    r4.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    r4.setCigarString("10M");

    List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, readAttributes);

    while (li.hasNext()) {
      AlignmentContext context = li.next();
      ReadBackedPileup pileup = context.getBasePileup();
      Assert.assertEquals(pileup.depthOfCoverage(), 4);
    }
  }
  /**
   * Test to make sure that reads supporting only an indel (example cigar string: 76I) do not
   * negatively influence the ordering of the pileup.
   */
  @Test
  public void testWholeIndelRead() {
    final int firstLocus = 44367788, secondLocus = firstLocus + 1;

    SAMRecord leadingRead =
        ArtificialSAMUtils.createArtificialRead(header, "leading", 0, firstLocus, 76);
    leadingRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
    leadingRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
    leadingRead.setCigarString("1M75I");

    SAMRecord indelOnlyRead =
        ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, secondLocus, 76);
    indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
    indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
    indelOnlyRead.setCigarString("76I");

    SAMRecord fullMatchAfterIndel =
        ArtificialSAMUtils.createArtificialRead(header, "fullMatch", 0, secondLocus, 76);
    fullMatchAfterIndel.setReadBases(Utils.dupBytes((byte) 'A', 76));
    fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte) '@', 76));
    fullMatchAfterIndel.setCigarString("75I1M");

    List<SAMRecord> reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, createTestReadProperties());
    int currentLocus = firstLocus;
    int numAlignmentContextsFound = 0;

    while (li.hasNext()) {
      AlignmentContext alignmentContext = li.next();
      Assert.assertEquals(
          alignmentContext.getLocation().getStart(),
          currentLocus,
          "Current locus returned by alignment context is incorrect");

      if (currentLocus == firstLocus) {
        List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
        Assert.assertEquals(
            readsAtLocus.size(), 1, "Wrong number of reads at locus " + currentLocus);
        Assert.assertSame(
            readsAtLocus.get(0),
            leadingRead,
            "leadingRead absent from pileup at locus " + currentLocus);
      } else if (currentLocus == secondLocus) {
        List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
        Assert.assertEquals(
            readsAtLocus.size(), 2, "Wrong number of reads at locus " + currentLocus);
        Assert.assertSame(
            readsAtLocus.get(0),
            indelOnlyRead,
            "indelOnlyRead absent from pileup at locus " + currentLocus);
        Assert.assertSame(
            readsAtLocus.get(1),
            fullMatchAfterIndel,
            "fullMatchAfterIndel absent from pileup at locus " + currentLocus);
      }

      currentLocus++;
      numAlignmentContextsFound++;
    }

    Assert.assertEquals(
        numAlignmentContextsFound, 2, "Found incorrect number of alignment contexts");
  }
示例#7
0
文件: Align.java 项目: nh13/SRMA
  private static void updateSAM(
      SAMRecord rec,
      ReferenceSequence sequence,
      SAMProgramRecord programRecord,
      AlignHeapNode bestAlignHeapNode,
      SRMAUtil.Space space,
      String read,
      String qualities,
      String softClipStartBases,
      String softClipStartQualities,
      String softClipEndBases,
      String softClipEndQualities,
      boolean strand,
      boolean correctBases)
      throws Exception {
    AlignHeapNode curAlignHeapNode = null;
    AlignHeapNode prevAlignHeapNode = null;

    int alignmentStart = 0;
    int readIndex = -1;
    byte readBases[] = null;
    byte baseQualities[] = null;
    byte colorErrors[] = null;
    int i;
    int numEdits = 0;
    List<String> optFieldTags = new LinkedList<String>();
    List<Object> optFieldValues = new LinkedList<Object>();
    Object attr;

    // Debugging stuff
    String readName = rec.getReadName();

    if (null == bestAlignHeapNode) {
      // Do not modify the alignment
      return;
    }

    // To generate a new CIGAR
    List<CigarElement> cigarElements = null;
    CigarOperator prevCigarOperator = null, curCigarOperator = null;
    int prevCigarOperatorLength = 0;

    // TODO
    // setInferredInsertSize (invalidates paired end reads)
    // setMappingQuality (?)
    // setFlag
    // update base qualities for color space reads

    // clear attributes, but save some
    Align.clearAttributes(rec, optFieldTags, optFieldValues);

    readBases = new byte[read.length()];
    baseQualities = new byte[qualities.length()];
    for (i = 0; i < qualities.length(); i++) {
      // Must subtract 33 for PHRED scaling
      baseQualities[i] = (byte) (qualities.charAt(i) - 33);
    }

    if (strand) {
      readIndex = 0;
    } else {
      readIndex = read.length() - 1;
    }
    cigarElements = new LinkedList<CigarElement>();
    if (strand) { // reverse strand is the current position
      alignmentStart = bestAlignHeapNode.node.position;
    } else {
      alignmentStart = bestAlignHeapNode.startPosition;
    }

    assert null != bestAlignHeapNode;
    curAlignHeapNode = bestAlignHeapNode;

    while (null != curAlignHeapNode) {
      // Get the current cigar operator
      if (null != prevAlignHeapNode
          && CigarOperator.DELETION != prevCigarOperator
          && 1 < Math.abs(curAlignHeapNode.node.position - prevAlignHeapNode.node.position)) {
        curCigarOperator = CigarOperator.DELETION;
      } else {
        switch (curAlignHeapNode.node.type) {
          case Node.MISMATCH: // Fall through
          case Node.MATCH:
            curCigarOperator = CigarOperator.MATCH_OR_MISMATCH;
            break;
          case Node.INSERTION:
            // System.out.println("INS");
            curCigarOperator = CigarOperator.INSERTION;
            break;
          default:
            throw new Exception("Unknown node type");
        }
        if (space == SRMAUtil.Space.COLORSPACE || correctBases) {
          readBases[readIndex] = (byte) curAlignHeapNode.node.base;
          if (strand) {
            readIndex++;
          } else {
            readIndex--;
          }
          // count the number of mismatches
          switch (curAlignHeapNode.node.type) {
            case Node.MISMATCH:
            case Node.INSERTION:
              numEdits++;
              break;
            default:
              break;
          }
        } else {
          // count the number of mismatches
          switch (curAlignHeapNode.node.type) {
            case Node.MATCH:
              if (read.charAt(curAlignHeapNode.readOffset) != curAlignHeapNode.node.base) {
                numEdits++;
              }
              break;
            case Node.MISMATCH: // Fall through
              if (read.charAt(curAlignHeapNode.readOffset)
                  != sequence.getBases()[curAlignHeapNode.node.position - 1]) {
                numEdits++;
              }
              break;
            case Node.INSERTION:
              numEdits++;
              break;
            default:
              break;
          }
        }
      }
      if (prevCigarOperator != curCigarOperator) {
        // different cigar operator

        // add the previous cigar operator
        if (null != prevCigarOperator) {
          if (strand) { // reverse
            // append
            cigarElements.add(new CigarElement(prevCigarOperatorLength, prevCigarOperator));
          } else {
            // prepend
            cigarElements.add(0, new CigarElement(prevCigarOperatorLength, prevCigarOperator));
          }
        }

        // update prevCigarOperator
        prevCigarOperator = curCigarOperator;
        if (curCigarOperator == CigarOperator.DELETION) {
          // length of deletion
          prevCigarOperatorLength =
              Math.abs(curAlignHeapNode.node.position - prevAlignHeapNode.node.position) - 1;
          numEdits += prevCigarOperatorLength; // deletions
        } else {
          prevCigarOperatorLength = 1;
        }
      } else {
        // same cigar operator
        prevCigarOperatorLength++;
      }

      // Update
      if (CigarOperator.DELETION != curCigarOperator) {
        prevAlignHeapNode = curAlignHeapNode;
        curAlignHeapNode = curAlignHeapNode.prev;
      }
    }
    if (0 < prevCigarOperatorLength) {
      if (null == prevCigarOperator || CigarOperator.DELETION == prevCigarOperator) {
        throw new Exception("Ended with a null cigar operator or a deletion cigar operator");
      }
      if (strand) { // reverse
        // append
        cigarElements.add(new CigarElement(prevCigarOperatorLength, prevCigarOperator));
      } else {
        // prepend
        cigarElements.add(0, new CigarElement(prevCigarOperatorLength, prevCigarOperator));
      }
    }

    if (space == SRMAUtil.Space.COLORSPACE) { // color space, read bases already inferred
      // Get color error string
      colorErrors = new byte[read.length()];
      char prevBase = SRMAUtil.COLORSPACE_ADAPTOR;
      if (strand) { // reverse
        for (i = 0; i < read.length(); i++) {
          char nextBase = SRMAUtil.colorSpaceNextBase(prevBase, read.charAt(i));
          if (nextBase == SRMAUtil.getCompliment((char) readBases[read.length() - i - 1])) {
            colorErrors[i] = (byte) Alignment.GAP;
          } else {
            colorErrors[i] = (byte) read.charAt(i);
          }
          if (0 < i) {
            // qualities are assumed to be always in the same direction as the color errors
            baseQualities[read.length() - i] =
                getColorQuality(
                    colorErrors[i - 1],
                    colorErrors[i],
                    (byte) (qualities.charAt(i - 1) - 33),
                    (byte) (qualities.charAt(i) - 33));
          }
          prevBase = SRMAUtil.getCompliment((char) readBases[read.length() - i - 1]);
        }
        // last color
        baseQualities[0] = (byte) (qualities.charAt(read.length() - 1) - 33);
      } else {
        for (i = 0; i < read.length(); i++) {
          char nextBase = SRMAUtil.colorSpaceNextBase(prevBase, read.charAt(i));
          if (nextBase == readBases[i]) {
            colorErrors[i] = (byte) Alignment.GAP;
          } else {
            colorErrors[i] = (byte) read.charAt(i);
          }
          if (0 < i) {
            baseQualities[i - 1] =
                getColorQuality(
                    colorErrors[i - 1],
                    colorErrors[i],
                    (byte) (qualities.charAt(i - 1) - 33),
                    (byte) (qualities.charAt(i) - 33));
          }
          prevBase = (char) readBases[i];
        }
        // last color
        baseQualities[read.length() - 1] = (byte) (qualities.charAt(read.length() - 1) - 33);
      }
    } else if (correctBases) { // bases were corrected
      if (strand) {
        for (i = 0; i < read.length(); i++) {
          if (readBases[i] == (byte) read.charAt(read.length() - i - 1)) {
            baseQualities[i] = (byte) (qualities.charAt(read.length() - i - 1) - 33);
          } else {
            // TODO: how much to down-weight ?
            baseQualities[i] =
                (byte)
                    (SRMAUtil.QUAL2CHAR(
                            SRMAUtil.CHAR2QUAL(qualities.charAt(read.length() - i - 1))
                                - CORRECT_BASE_QUALITY_PENALTY)
                        - 33);
            if (baseQualities[i] <= 0) {
              baseQualities[i] = 1;
            }
          }
        }
      } else {
        for (i = 0; i < read.length(); i++) {
          if (readBases[i] == (byte) read.charAt(i)) {
            baseQualities[i] = (byte) (qualities.charAt(i) - 33);
          } else {
            // TODO: how much to down-weight ?
            baseQualities[i] =
                (byte)
                    (SRMAUtil.QUAL2CHAR(
                            SRMAUtil.CHAR2QUAL(qualities.charAt(i)) - CORRECT_BASE_QUALITY_PENALTY)
                        - 33);
            if (baseQualities[i] <= 0) {
              baseQualities[i] = 1;
            }
          }
        }
      }
      rec.setAttribute("XO", read);
      rec.setAttribute("XQ", qualities);
    } else { // bases not corrected
      readBases = new byte[read.length()];
      baseQualities = new byte[qualities.length()]; // qualities.length() == read.length()
      if (strand) { // reverse
        for (i = 0; i < read.length(); i++) {
          readBases[i] = (byte) read.charAt(read.length() - i - 1);
          baseQualities[i] = (byte) (qualities.charAt(read.length() - i - 1) - 33);
        }
      } else {
        for (i = 0; i < read.length(); i++) {
          readBases[i] = (byte) read.charAt(i);
          baseQualities[i] = (byte) (qualities.charAt(i) - 33);
        }
      }
    }

    // Add in soft-clipping
    if (null != softClipStartBases) { // prepend
      cigarElements.add(0, new CigarElement(softClipStartBases.length(), CigarOperator.S));

      byte tmpBases[] = new byte[readBases.length + softClipStartBases.length()];
      System.arraycopy(readBases, 0, tmpBases, softClipStartBases.length(), readBases.length);
      readBases = tmpBases;
      for (i = 0; i < softClipStartBases.length(); i++) {
        readBases[i] = (byte) softClipStartBases.charAt(i);
      }

      byte tmpQualities[] = new byte[baseQualities.length + softClipStartQualities.length()];
      System.arraycopy(
          baseQualities, 0, tmpQualities, softClipStartQualities.length(), baseQualities.length);
      baseQualities = tmpQualities;
      for (i = 0; i < softClipStartQualities.length(); i++) {
        baseQualities[i] = (byte) softClipStartQualities.charAt(i);
      }
    }
    if (null != softClipEndBases) { // append
      cigarElements.add(new CigarElement(softClipEndBases.length(), CigarOperator.S));

      byte tmpBases[] = new byte[readBases.length + softClipEndBases.length()];
      System.arraycopy(readBases, 0, tmpBases, 0, readBases.length);
      for (i = 0; i < softClipEndBases.length(); i++) {
        tmpBases[i + readBases.length] = (byte) softClipEndBases.charAt(i);
      }
      readBases = tmpBases;

      byte tmpQualities[] = new byte[baseQualities.length + softClipEndQualities.length()];
      System.arraycopy(baseQualities, 0, tmpQualities, 0, baseQualities.length);
      for (i = 0; i < softClipEndQualities.length(); i++) {
        tmpQualities[i + baseQualities.length] = (byte) softClipEndQualities.charAt(i);
      }
      baseQualities = tmpQualities;
    }

    // Update SAM record
    rec.setCigar(new Cigar(cigarElements));
    rec.setAlignmentStart(alignmentStart);
    rec.setReadBases(readBases);
    rec.setBaseQualities(baseQualities);
    // Reset saved attributes
    Align.resetAttributes(rec, optFieldTags, optFieldValues);
    // Set new attributes
    if (space == SRMAUtil.Space.COLORSPACE) {
      // set the XE attribute for colorError string
      rec.setAttribute("XE", new String(colorErrors));
    }
    rec.setAttribute("AS", bestAlignHeapNode.score);
    rec.setAttribute("XC", bestAlignHeapNode.alleleCoverageSum);
    rec.setAttribute("PG", programRecord.getId());
    rec.setAttribute("NM", numEdits);
  }