Esempio n. 1
0
  /**
   * Creates an empty GATKSAMRecord with the read's header, read group and mate information, but
   * empty (not-null) fields: - Cigar String - Read Bases - Base Qualities
   *
   * <p>Use this method if you want to create a new empty GATKSAMRecord based on another
   * GATKSAMRecord
   *
   * @param read a read to copy the header from
   * @return a read with no bases but safe for the GATK
   */
  public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
    final GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader());
    emptyRead.setReferenceIndex(read.getReferenceIndex());
    emptyRead.setAlignmentStart(0);
    emptyRead.setMappingQuality(0);
    // setting read indexing bin last
    emptyRead.setFlags(read.getFlags());
    emptyRead.setMateReferenceIndex(read.getMateReferenceIndex());
    emptyRead.setMateAlignmentStart(read.getMateAlignmentStart());
    emptyRead.setInferredInsertSize(read.getInferredInsertSize());

    emptyRead.setCigarString("");
    emptyRead.setReadBases(new byte[0]);
    emptyRead.setBaseQualities(new byte[0]);

    SAMReadGroupRecord samRG = read.getReadGroup();
    emptyRead.clearAttributes();
    if (samRG != null) {
      GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
      emptyRead.setReadGroup(rg);
    }

    GATKBin.setReadIndexingBin(emptyRead, 0);

    return emptyRead;
  }
 private GATKSAMRecord makeRead(final int fragmentSize, final int mateStart) {
   final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
   final byte[] quals = {30, 30, 30, 30, 30, 30, 30, 30};
   final String cigar = "8M";
   GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, cigar);
   read.setProperPairFlag(true);
   read.setReadPairedFlag(true);
   read.setMateAlignmentStart(mateStart);
   read.setInferredInsertSize(fragmentSize);
   return read;
 }
  @Test(dataProvider = "AdaptorGetter")
  public void testGetAdaptorBoundary(final GetAdaptorFunc get) {
    final int fragmentSize = 10;
    final int mateStart = 1000;
    final int BEFORE = mateStart - 2;
    final int AFTER = mateStart + 2;
    int myStart, boundary;
    GATKSAMRecord read;

    // Test case 1: positive strand, first read
    read = makeRead(fragmentSize, mateStart);
    myStart = BEFORE;
    read.setAlignmentStart(myStart);
    read.setReadNegativeStrandFlag(false);
    read.setMateNegativeStrandFlag(true);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, myStart + fragmentSize + 1);

    // Test case 2: positive strand, second read
    read = makeRead(fragmentSize, mateStart);
    myStart = AFTER;
    read.setAlignmentStart(myStart);
    read.setReadNegativeStrandFlag(false);
    read.setMateNegativeStrandFlag(true);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, myStart + fragmentSize + 1);

    // Test case 3: negative strand, second read
    read = makeRead(fragmentSize, mateStart);
    myStart = AFTER;
    read.setAlignmentStart(myStart);
    read.setReadNegativeStrandFlag(true);
    read.setMateNegativeStrandFlag(false);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, mateStart - 1);

    // Test case 4: negative strand, first read
    read = makeRead(fragmentSize, mateStart);
    myStart = BEFORE;
    read.setAlignmentStart(myStart);
    read.setReadNegativeStrandFlag(true);
    read.setMateNegativeStrandFlag(false);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, mateStart - 1);

    // Test case 5: mate is mapped to another chromosome (test both strands)
    read = makeRead(fragmentSize, mateStart);
    read.setInferredInsertSize(0);
    read.setReadNegativeStrandFlag(true);
    read.setMateNegativeStrandFlag(false);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
    read.setReadNegativeStrandFlag(false);
    read.setMateNegativeStrandFlag(true);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
    read.setInferredInsertSize(10);

    // Test case 6: read is unmapped
    read = makeRead(fragmentSize, mateStart);
    read.setReadUnmappedFlag(true);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
    read.setReadUnmappedFlag(false);

    // Test case 7:  reads don't overlap and look like this:
    //    <--------|
    //                 |------>
    // first read:
    read = makeRead(fragmentSize, mateStart);
    myStart = 980;
    read.setAlignmentStart(myStart);
    read.setInferredInsertSize(20);
    read.setReadNegativeStrandFlag(true);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);

    // second read:
    read = makeRead(fragmentSize, mateStart);
    myStart = 1000;
    read.setAlignmentStart(myStart);
    read.setInferredInsertSize(20);
    read.setMateAlignmentStart(980);
    read.setReadNegativeStrandFlag(false);
    boundary = get.getAdaptor(read);
    Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);

    // Test case 8: read doesn't have proper pair flag set
    read = makeRead(fragmentSize, mateStart);
    read.setReadPairedFlag(true);
    read.setProperPairFlag(false);
    Assert.assertEquals(get.getAdaptor(read), ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);

    // Test case 9: read and mate have same negative flag setting
    for (final boolean negFlag : Arrays.asList(true, false)) {
      read = makeRead(fragmentSize, mateStart);
      read.setAlignmentStart(BEFORE);
      read.setReadPairedFlag(true);
      read.setProperPairFlag(true);
      read.setReadNegativeStrandFlag(negFlag);
      read.setMateNegativeStrandFlag(!negFlag);
      Assert.assertTrue(
          get.getAdaptor(read) != ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY,
          "Get adaptor should have succeeded");

      read = makeRead(fragmentSize, mateStart);
      read.setAlignmentStart(BEFORE);
      read.setReadPairedFlag(true);
      read.setProperPairFlag(true);
      read.setReadNegativeStrandFlag(negFlag);
      read.setMateNegativeStrandFlag(negFlag);
      Assert.assertEquals(
          get.getAdaptor(read),
          ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY,
          "Get adaptor should have failed for reads with bad alignment orientation");
    }
  }
  @DataProvider(name = "HasWellDefinedFragmentSizeData")
  public Object[][] makeHasWellDefinedFragmentSizeData() throws Exception {
    final List<Object[]> tests = new LinkedList<Object[]>();

    // setup a basic read that will work
    final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader();
    final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, 10);
    read.setReadPairedFlag(true);
    read.setProperPairFlag(true);
    read.setReadUnmappedFlag(false);
    read.setMateUnmappedFlag(false);
    read.setAlignmentStart(100);
    read.setCigarString("50M");
    read.setMateAlignmentStart(130);
    read.setInferredInsertSize(80);
    read.setFirstOfPairFlag(true);
    read.setReadNegativeStrandFlag(false);
    read.setMateNegativeStrandFlag(true);

    tests.add(new Object[] {"basic case", read.clone(), true});

    {
      final GATKSAMRecord bad1 = (GATKSAMRecord) read.clone();
      bad1.setReadPairedFlag(false);
      tests.add(new Object[] {"not paired", bad1, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setProperPairFlag(false);
      // we currently don't require the proper pair flag to be set
      tests.add(new Object[] {"not proper pair", bad, true});
      //            tests.add( new Object[]{ "not proper pair", bad, false });
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setReadUnmappedFlag(true);
      tests.add(new Object[] {"read is unmapped", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setMateUnmappedFlag(true);
      tests.add(new Object[] {"mate is unmapped", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setMateNegativeStrandFlag(false);
      tests.add(new Object[] {"read and mate both on positive strand", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setReadNegativeStrandFlag(true);
      tests.add(new Object[] {"read and mate both on negative strand", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setInferredInsertSize(0);
      tests.add(new Object[] {"insert size is 0", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setAlignmentStart(1000);
      tests.add(new Object[] {"positve read starts after mate end", bad, false});
    }

    {
      final GATKSAMRecord bad = (GATKSAMRecord) read.clone();
      bad.setReadNegativeStrandFlag(true);
      bad.setMateNegativeStrandFlag(false);
      bad.setMateAlignmentStart(1000);
      tests.add(new Object[] {"negative strand read ends before mate starts", bad, false});
    }

    return tests.toArray(new Object[][] {});
  }