@Test(description = "Write SAM records with null SAMFileHeader")
  public void samNullHeaderRoundTrip() throws Exception {
    final File input = new File(TEST_DATA_DIR, "roundtrip.sam");

    final SamReader reader = SamReaderFactory.makeDefault().open(input);
    final File outputFile = File.createTempFile("nullheader-out", ".sam");
    outputFile.delete();
    outputFile.deleteOnExit();
    FileOutputStream os = new FileOutputStream(outputFile);
    final SAMFileWriterFactory factory = new SAMFileWriterFactory();
    final SAMFileWriter writer = factory.makeSAMWriter(reader.getFileHeader(), false, os);
    for (SAMRecord rec : reader) {
      rec.setHeader(null);
      writer.addAlignment(rec);
    }
    writer.close();
    os.close();

    InputStream is = new FileInputStream(input);
    String originalsam = IOUtil.readFully(is);
    is.close();

    is = new FileInputStream(outputFile);
    String writtensam = IOUtil.readFully(is);
    is.close();

    Assert.assertEquals(writtensam, originalsam);
  }
  @Test(dataProvider = "LIBSTest")
  public void testLIBS(LIBSTest params) {
    final int locus = 44367788;

    SAMRecord read =
        ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
    read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
    read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
    read.setCigarString(params.cigar);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(Arrays.asList(read), createTestReadProperties());
    final LIBS_position tester = new LIBS_position(read);

    while (li.hasNext()) {
      AlignmentContext alignmentContext = li.next();
      ReadBackedPileup p = alignmentContext.getBasePileup();
      Assert.assertTrue(p.getNumberOfElements() == 1);
      PileupElement pe = p.iterator().next();

      tester.stepForwardOnGenome();

      Assert.assertEquals(pe.isBeforeDeletedBase(), tester.isBeforeDeletedBase);
      Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart);
      Assert.assertEquals(pe.isAfterDeletedBase(), tester.isAfterDeletedBase);
      Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd);
      Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion);
      Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion);
      Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip);
      Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset());
    }
  }
  /**
   * Compare two records based on their duplicate scores. If the scores are equal, we break ties
   * based on mapping quality (added to the mate's mapping quality if paired and mapped), then
   * library/read name.
   *
   * <p>If true is given to assumeMateCigar, then any score that can use the mate cigar to to
   * compute the mate's score will return the score computed on both ends.
   *
   * <p>We allow different scoring strategies. We return <0 if rec1 has a better strategy than rec2.
   */
  public static int compare(
      final SAMRecord rec1,
      final SAMRecord rec2,
      final ScoringStrategy scoringStrategy,
      final boolean assumeMateCigar) {
    int cmp;

    // always prefer paired over non-paired
    if (rec1.getReadPairedFlag() != rec2.getReadPairedFlag())
      return rec1.getReadPairedFlag() ? 1 : -1;

    cmp =
        computeDuplicateScore(rec2, scoringStrategy, assumeMateCigar)
            - computeDuplicateScore(rec1, scoringStrategy, assumeMateCigar);

    /**
     * Finally, use library ID and read name This is important because we cannot control the order
     * in which reads appear for reads that are comparable up to now (i.e. cmp == 0). We want to
     * deterministically choose them, and so we need this.
     */
    if (0 == cmp)
      cmp = SAMUtils.getCanonicalRecordName(rec1).compareTo(SAMUtils.getCanonicalRecordName(rec2));

    return cmp;
  }
  @Test
  public void testWholeIndelReadInIsolation() {
    final int firstLocus = 44367789;

    // create a test version of the Reads object
    ReadProperties readAttributes = createTestReadProperties();

    SAMRecord indelOnlyRead =
        ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
    indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
    indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
    indelOnlyRead.setCigarString("76I");

    List<SAMRecord> reads = Arrays.asList(indelOnlyRead);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, readAttributes);

    // Traditionally, reads that end with indels bleed into the pileup at the following locus.
    // Verify that the next pileup contains this read
    // and considers it to be an indel-containing read.
    Assert.assertTrue(
        li.hasNext(),
        "Should have found a whole-indel read in the normal base pileup without extended events enabled");
    AlignmentContext alignmentContext = li.next();
    Assert.assertEquals(
        alignmentContext.getLocation().getStart(),
        firstLocus,
        "Base pileup is at incorrect location.");
    ReadBackedPileup basePileup = alignmentContext.getBasePileup();
    Assert.assertEquals(basePileup.getReads().size(), 1, "Pileup is of incorrect size");
    Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect");
  }
示例#5
0
 public void addAlignment(SAMRecord samRecord) {
   final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);
   String rg = samRecord.getStringAttribute("RG");
   if (rg != null) {
     String rg_orig = toolkit.getReadsDataSource().getOriginalReadGroupId(rg);
     samRecord.setAttribute("RG", rg_orig);
   }
   addAlignment(samRecord, id);
 }
 /** Return the sort key used for the given sort order. Useful in error messages. */
 public String getSortKey(final SAMRecord rec) {
   switch (sortOrder) {
     case coordinate:
       return rec.getReferenceName() + ":" + rec.getAlignmentStart();
     case queryname:
       return rec.getReadName();
     case unsorted:
     default:
       return null;
   }
 }
示例#7
0
文件: Align.java 项目: nh13/SRMA
  private static void clearAttributes(
      SAMRecord rec, List<String> optFieldTags, List<Object> optFieldValues) {
    ListIterator<String> iter = saveTags.listIterator();

    while (iter.hasNext()) {
      String tag = iter.next();
      Object attr = rec.getAttribute(tag);
      if (null != attr) {
        optFieldTags.add(tag);
        optFieldValues.add(attr);
      }
    }
    rec.clearAttributes();
  }
示例#8
0
    /**
     * Record any index information for a given BAM record
     *
     * @param rec The BAM record. Requires rec.getFileSource() is non-null.
     */
    public void processAlignment(final SAMRecord rec) {

      // metadata
      indexStats.recordMetaData(rec);

      if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
        return; // do nothing for records without coordinates, but count them
      }

      // various checks
      final int reference = rec.getReferenceIndex();
      if (reference != currentReference) {
        throw new SAMException(
            "Unexpected reference "
                + reference
                + " when constructing index for "
                + currentReference
                + " for record "
                + rec);
      }

      binningIndexBuilder.processFeature(
          new BinningIndexBuilder.FeatureToBeIndexed() {
            @Override
            public int getStart() {
              return rec.getAlignmentStart();
            }

            @Override
            public int getEnd() {
              return rec.getAlignmentEnd();
            }

            @Override
            public Integer getIndexingBin() {
              final Integer binNumber = rec.getIndexingBin();
              return (binNumber == null ? rec.computeIndexingBin() : binNumber);
            }

            @Override
            public Chunk getChunk() {
              final SAMFileSource source = rec.getFileSource();
              if (source == null) {
                throw new SAMException(
                    "No source (virtual file offsets); needed for indexing on BAM Record " + rec);
              }
              return ((BAMFileSpan) source.getFilePointer()).getSingleChunk();
            }
          });
    }
示例#9
0
 private int compareCoordinates(final SAMRecord record1, final SAMRecord record2) {
   final int seqIndex1 = record1.getReferenceIndex();
   final int seqIndex2 = record2.getReferenceIndex();
   if (seqIndex1 == -1) {
     return ((seqIndex2 == -1) ? 0 : -1);
   } else if (seqIndex2 == -1) {
     return 1;
   }
   int result = seqIndex1 - seqIndex2;
   if (result != 0) {
     return result;
   }
   result = record1.getAlignmentStart() - record2.getAlignmentStart();
   return result;
 }
    public void acceptRecord(final SAMRecordAndReference args) {
      final SAMRecord rec = args.getSamRecord();
      final ReferenceSequence ref = args.getReferenceSequence();

      if (rec.getReadPairedFlag()) {
        if (rec.getFirstOfPairFlag()) {
          firstOfPairCollector.addRecord(rec, ref);
        } else {
          secondOfPairCollector.addRecord(rec, ref);
        }

        pairCollector.addRecord(rec, ref);
      } else {
        unpairedCollector.addRecord(rec, ref);
      }
    }
示例#11
0
 private void checkPassesFilter(
     final boolean expected,
     final SAMRecord record,
     final String sequence,
     final int startPos,
     final int endPos,
     final boolean contained) {
   final boolean passes = passesFilter(record, sequence, startPos, endPos, contained);
   if (passes != expected) {
     System.out.println(
         "Error: Record erroneously " + (passes ? "passed" : "failed") + " filter.");
     System.out.println(" Record: " + record.getSAMString());
     System.out.println(
         " Filter: "
             + sequence
             + ":"
             + startPos
             + "-"
             + endPos
             + " ("
             + (contained ? "contained" : "overlapping")
             + ")");
     assertEquals(passes, expected);
   }
 }
  /** Calculates a score for the read which is the sum of scores over Q15. */
  private static int getSumOfBaseQualities(final SAMRecord rec) {
    int score = 0;
    for (final byte b : rec.getBaseQualities()) {
      if (b >= 15) score += b;
    }

    return score;
  }
  /** Tests that we can successfully merge two files with */
  @Test
  public void testMerging() {
    File INPUT[] = {
      new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome1to10.bam"),
      new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome5to9.bam")
    };
    final List<SAMFileReader> readers = new ArrayList<SAMFileReader>();
    final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
    for (final File inFile : INPUT) {
      IOUtil.assertFileIsReadable(inFile);
      final SAMFileReader in = new SAMFileReader(inFile);
      // We are now checking for zero-length reads, so suppress complaint about that.
      in.setValidationStringency(ValidationStringency.SILENT);
      readers.add(in);
      headers.add(in.getFileHeader());
    }
    final MergingSamRecordIterator iterator;
    final SamFileHeaderMerger headerMerger =
        new SamFileHeaderMerger(SAMFileHeader.SortOrder.unsorted, headers, true);
    iterator = new MergingSamRecordIterator(headerMerger, readers, false);
    headerMerger.getMergedHeader();

    // count the total reads, and record read counts for each sequence
    Map<Integer, Integer> seqCounts = new HashMap<Integer, Integer>();
    int totalCount = 0;

    while (iterator.hasNext()) {
      SAMRecord r = iterator.next();
      if (seqCounts.containsKey(r.getReferenceIndex())) {
        seqCounts.put(r.getReferenceIndex(), seqCounts.get(r.getReferenceIndex()) + 1);
      } else {
        seqCounts.put(r.getReferenceIndex(), 1);
      }
      ++totalCount;
    }
    assertEquals(totalCount, 1500);
    for (Integer i : seqCounts.keySet()) {
      if (i < 4 || i > 8) {
        // seqeunce 5 - 9 should have 200 reads (indices 4 - 8)
        assertEquals(seqCounts.get(i).intValue(), 100);
      } else {
        // the others should have 100
        assertEquals(seqCounts.get(i).intValue(), 200);
      }
    }
  }
示例#14
0
  /**
   * Finds the adaptor boundary around the read and returns the first base inside the adaptor that
   * is closest to the read boundary. If the read is in the positive strand, this is the first base
   * after the end of the fragment (Picard calls it 'insert'), if the read is in the negative
   * strand, this is the first base before the beginning of the fragment.
   *
   * <p>There are two cases we need to treat here:
   *
   * <p>1) Our read is in the reverse strand :
   *
   * <p><----------------------| * |--------------------->
   *
   * <p>in these cases, the adaptor boundary is at the mate start (minus one)
   *
   * <p>2) Our read is in the forward strand :
   *
   * <p>|----------------------> * <----------------------|
   *
   * <p>in these cases the adaptor boundary is at the start of the read plus the inferred insert
   * size (plus one)
   *
   * @param read the read being tested for the adaptor boundary
   * @return the reference coordinate for the adaptor boundary (effectively the first base IN the
   *     adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another
   *     contig.
   */
  public static Integer getAdaptorBoundary(final SAMRecord read) {
    final int MAXIMUM_ADAPTOR_LENGTH = 8;
    final int insertSize =
        Math.abs(
            read
                .getInferredInsertSize()); // the inferred insert size can be negative if the mate
                                           // is mapped before the read (so we take the absolute
                                           // value)

    if (insertSize == 0
        || read
            .getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or
                                    // unmapped pairs
    return null;

    Integer
        adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first
                         // base IN the adaptor, closest to the read)
    if (read.getReadNegativeStrandFlag())
      adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header)
    else adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header)

    if ((adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH)
        || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH))
      adaptorBoundary =
          null; // we are being conservative by not allowing the adaptor boundary to go beyond what
                // we belive is the maximum size of an adaptor

    return adaptorBoundary;
  }
示例#15
0
文件: Align.java 项目: nh13/SRMA
  private static void resetAttributes(
      SAMRecord rec, List<String> optFieldTags, List<Object> optFieldValues) {
    ListIterator<String> iterTags = optFieldTags.listIterator();
    ListIterator<Object> iterValues = optFieldValues.listIterator();

    while (iterTags.hasNext()) {
      rec.setAttribute(iterTags.next(), iterValues.next());
    }
  }
      public void addRecord(final SAMRecord record, final ReferenceSequence ref) {
        if (record.isSecondaryOrSupplementary()) {
          // only want 1 count per read so skip non primary alignments
          return;
        }

        collectReadData(record, ref);
        collectQualityData(record, ref);
      }
示例#17
0
  /**
   * checks if the read has a platform tag in the readgroup equal to 'name'. Assumes that 'name' is
   * upper-cased.
   *
   * @param read the read to test
   * @param name the upper-cased platform name to test
   * @return whether or not name == PL tag in the read group of read
   */
  public static boolean isPlatformRead(SAMRecord read, String name) {

    SAMReadGroupRecord readGroup = read.getReadGroup();
    if (readGroup != null) {
      Object readPlatformAttr = readGroup.getAttribute("PL");
      if (readPlatformAttr != null) return readPlatformAttr.toString().toUpperCase().contains(name);
    }
    return false;
  }
示例#18
0
  protected int doWork() {
    IoUtil.assertFileIsReadable(INPUT);
    IoUtil.assertFileIsWritable(OUTPUT);

    final SAMFileReader in = new SAMFileReader(INPUT);

    // create the read group we'll be using
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID);
    rg.setLibrary(RGLB);
    rg.setPlatform(RGPL);
    rg.setSample(RGSM);
    rg.setPlatformUnit(RGPU);
    if (RGCN != null) rg.setSequencingCenter(RGCN);
    if (RGDS != null) rg.setDescription(RGDS);
    if (RGDT != null) rg.setRunDate(RGDT);

    log.info(
        String.format(
            "Created read group ID=%s PL=%s LB=%s SM=%s%n",
            rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample()));

    // create the new header and output file
    final SAMFileHeader inHeader = in.getFileHeader();
    final SAMFileHeader outHeader = inHeader.clone();
    outHeader.setReadGroups(Arrays.asList(rg));
    if (SORT_ORDER != null) outHeader.setSortOrder(SORT_ORDER);

    final SAMFileWriter outWriter =
        new SAMFileWriterFactory()
            .makeSAMOrBAMWriter(
                outHeader, outHeader.getSortOrder() == inHeader.getSortOrder(), OUTPUT);

    final ProgressLogger progress = new ProgressLogger(log);
    for (final SAMRecord read : in) {
      read.setAttribute(SAMTag.RG.name(), RGID);
      outWriter.addAlignment(read);
      progress.record(read);
    }

    // cleanup
    in.close();
    outWriter.close();
    return 0;
  }
示例#19
0
 /**
  * HACK: This is used to make a copy of a read. Really, SAMRecord should provide a copy
  * constructor or a factory method.
  */
 public static SAMRecord cloneSAMRecord(final SAMRecord originalRead) {
   if (originalRead == null) {
     return null;
   }
   try {
     return (SAMRecord) originalRead.clone();
   } catch (final CloneNotSupportedException e) {
     throw new IllegalStateException(e);
   }
 }
  /**
   * Returns the duplicate score computed from the given fragment. value should be capped by
   * Short.MAX_VALUE/2 since the score from two reads will be added and an overflow will be
   *
   * <p>If true is given to assumeMateCigar, then any score that can use the mate cigar to compute
   * the mate's score will return the score computed on both ends.
   */
  public static short computeDuplicateScore(
      final SAMRecord record,
      final ScoringStrategy scoringStrategy,
      final boolean assumeMateCigar) {
    Short storedScore = (Short) record.getTransientAttribute(Attr.DuplicateScore);

    if (storedScore == null) {
      short score = 0;
      switch (scoringStrategy) {
        case SUM_OF_BASE_QUALITIES:
          // two (very) long reads worth of high-quality bases can go over Short.MAX_VALUE/2
          // and risk overflow.
          score += (short) Math.min(getSumOfBaseQualities(record), Short.MAX_VALUE / 2);
          break;
        case TOTAL_MAPPED_REFERENCE_LENGTH:
          if (!record.getReadUnmappedFlag()) {
            // no need to remember the score since this scoring mechanism is symmetric
            score = (short) Math.min(record.getCigar().getReferenceLength(), Short.MAX_VALUE / 2);
          }
          if (assumeMateCigar && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
            score +=
                (short)
                    Math.min(
                        SAMUtils.getMateCigar(record).getReferenceLength(), Short.MAX_VALUE / 2);
          }
          break;
          // The RANDOM score gives the same score to both reads so that they get filtered together.
          // it's not critical do use the readName since the scores from both ends get added, but it
          // seem
          // to be clearer this way.
        case RANDOM:
          // start with a random number between Short.MIN_VALUE/4 and Short.MAX_VALUE/4
          score += (short) (hasher.hashUnencodedChars(record.getReadName()) & 0b11_1111_1111_1111);
          // subtract Short.MIN_VALUE/4 from it to end up with a number between
          // 0 and Short.MAX_VALUE/2. This number can be then discounted in case the read is
          // not passing filters. We need to stay far from overflow so that when we add the two
          // scores from the two read mates we do not overflow since that could cause us to chose a
          // failing read-pair instead of a passing one.
          score -= Short.MIN_VALUE / 4;
      }

      // make sure that filter-failing records are heavily discounted. (the discount can happen
      // twice, once
      // for each mate, so need to make sure we do not subtract more than Short.MIN_VALUE overall.)
      score += record.getReadFailsVendorQualityCheckFlag() ? (short) (Short.MIN_VALUE / 2) : 0;

      storedScore = score;
      record.setTransientAttribute(Attr.DuplicateScore, storedScore);
    }

    return storedScore;
  }
示例#21
0
  private int countAlignmentsInWindow(
      int reference, int window, SAMFileReader reader, int expectedCount) {
    final int SIXTEEN_K = 1 << 14; // 1 << LinearIndex.BAM_LIDX_SHIFT
    final int start = window >> 14; // window * SIXTEEN_K;
    final int stop = ((window + 1) >> 14) - 1; // (window + 1 * SIXTEEN_K) - 1;

    final String chr = reader.getFileHeader().getSequence(reference).getSequenceName();

    // get records for the entire linear index window
    SAMRecordIterator iter = reader.queryOverlapping(chr, start, stop);
    SAMRecord rec;
    int count = 0;
    while (iter.hasNext()) {
      rec = iter.next();
      count++;
      if (expectedCount == -1) System.err.println(rec.getReadName());
    }
    iter.close();
    return count;
  }
示例#22
0
 /**
  * Record any index information for a given BAM record. If this alignment starts a new reference,
  * write out the old reference. Requires a non-null value for rec.getFileSource().
  *
  * @param rec The BAM record
  */
 public void processAlignment(final SAMRecord rec) {
   try {
     final int reference = rec.getReferenceIndex();
     if (reference != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && reference != currentReference) {
       // process any completed references
       advanceToReference(reference);
     }
     indexBuilder.processAlignment(rec);
   } catch (final Exception e) {
     throw new SAMException("Exception creating BAM index for record " + rec, e);
   }
 }
示例#23
0
 private boolean passesFilter(
     final SAMRecord record,
     final String sequence,
     final int startPos,
     final int endPos,
     final boolean contained) {
   if (record == null) {
     return false;
   }
   if (!safeEquals(record.getReferenceName(), sequence)) {
     return false;
   }
   final int alignmentStart = record.getAlignmentStart();
   int alignmentEnd = record.getAlignmentEnd();
   if (alignmentStart <= 0) {
     assertTrue(record.getReadUnmappedFlag());
     return false;
   }
   if (alignmentEnd <= 0) {
     // For indexing-only records, treat as single base alignment.
     assertTrue(record.getReadUnmappedFlag());
     alignmentEnd = alignmentStart;
   }
   if (contained) {
     if (startPos != 0 && alignmentStart < startPos) {
       return false;
     }
     if (endPos != 0 && alignmentEnd > endPos) {
       return false;
     }
   } else {
     if (startPos != 0 && alignmentEnd < startPos) {
       return false;
     }
     if (endPos != 0 && alignmentStart > endPos) {
       return false;
     }
   }
   return true;
 }
  /**
   * Test to make sure that reads supporting only an indel (example cigar string: 76I) are
   * represented properly
   */
  @Test
  public void testWholeIndelReadRepresentedTest() {
    final int firstLocus = 44367788, secondLocus = firstLocus + 1;

    SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, secondLocus, 1);
    read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
    read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
    read1.setCigarString("1I");

    List<SAMRecord> reads = Arrays.asList(read1);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, createTestReadProperties());

    while (li.hasNext()) {
      AlignmentContext alignmentContext = li.next();
      ReadBackedPileup p = alignmentContext.getBasePileup();
      Assert.assertTrue(p.getNumberOfElements() == 1);
      PileupElement pe = p.iterator().next();
      Assert.assertTrue(pe.isBeforeInsertion());
      Assert.assertFalse(pe.isAfterInsertion());
      Assert.assertEquals(pe.getEventBases(), "A");
    }

    SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, secondLocus, 10);
    read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
    read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
    read2.setCigarString("10I");

    reads = Arrays.asList(read2);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, createTestReadProperties());

    while (li.hasNext()) {
      AlignmentContext alignmentContext = li.next();
      ReadBackedPileup p = alignmentContext.getBasePileup();
      Assert.assertTrue(p.getNumberOfElements() == 1);
      PileupElement pe = p.iterator().next();
      Assert.assertTrue(pe.isBeforeInsertion());
      Assert.assertFalse(pe.isAfterInsertion());
      Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA");
    }
  }
示例#25
0
文件: Align.java 项目: nh13/SRMA
  private static void removeMateInfo(SAMRecord rec) {
    if (rec.getReadPairedFlag()) {
      // Remove all information of its mate

      // flag
      rec.setProperPairFlag(false); // not paired any more
      rec.setMateUnmappedFlag(false);
      rec.setMateNegativeStrandFlag(false);

      // entries
      rec.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
      rec.setMateAlignmentStart(0);
      rec.setInferredInsertSize(0);

      // TODO: remove tags and values that are mate pair inclined.
    }
  }
  public boolean filterOut(final SAMRecord read) {
    int alignedLength = 0;
    int softClipBlocks = 0;
    int minSoftClipBlocks = doNotRequireSoftclipsOnBothEnds ? 1 : 2;
    CigarOperator lastOperator = null;

    for (final CigarElement element : read.getCigar().getCigarElements()) {
      if (element.getOperator() == CigarOperator.S) {
        // Treat consecutive S blocks as a single one
        if (lastOperator != CigarOperator.S) {
          softClipBlocks += 1;
        }

      } else if (element
          .getOperator()
          .consumesReadBases()) { // M, I, X, and EQ (S was already accounted for above)
        alignedLength += element.getLength();
      }
      lastOperator = element.getOperator();
    }

    return (alignedLength < tooShort && softClipBlocks >= minSoftClipBlocks);
  }
  @Test
  public void testIndelsInRegularPileup() {
    final byte[] bases = new byte[] {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
    final byte[] indelBases =
        new byte[] {'A', 'A', 'A', 'A', 'C', 'T', 'A', 'A', 'A', 'A', 'A', 'A'};

    // create a test version of the Reads object
    ReadProperties readAttributes = createTestReadProperties();

    SAMRecord before = ArtificialSAMUtils.createArtificialRead(header, "before", 0, 1, 10);
    before.setReadBases(bases);
    before.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    before.setCigarString("10M");

    SAMRecord during = ArtificialSAMUtils.createArtificialRead(header, "during", 0, 2, 10);
    during.setReadBases(indelBases);
    during.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    during.setCigarString("4M2I6M");

    SAMRecord after = ArtificialSAMUtils.createArtificialRead(header, "after", 0, 3, 10);
    after.setReadBases(bases);
    after.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    after.setCigarString("10M");

    List<SAMRecord> reads = Arrays.asList(before, during, after);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, readAttributes);

    boolean foundIndel = false;
    while (li.hasNext()) {
      AlignmentContext context = li.next();
      ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10);
      for (PileupElement p : pileup) {
        if (p.isBeforeInsertion()) {
          foundIndel = true;
          Assert.assertEquals(p.getEventLength(), 2, "Wrong event length");
          Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect");
          break;
        }
      }
    }

    Assert.assertTrue(foundIndel, "Indel in pileup not found");
  }
  @Test
  public void testXandEQOperators() {
    final byte[] bases1 = new byte[] {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
    final byte[] bases2 = new byte[] {'A', 'A', 'A', 'C', 'A', 'A', 'A', 'A', 'A', 'C'};

    // create a test version of the Reads object
    ReadProperties readAttributes = createTestReadProperties();

    SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header, "r1", 0, 1, 10);
    r1.setReadBases(bases1);
    r1.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    r1.setCigarString("10M");

    SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header, "r2", 0, 1, 10);
    r2.setReadBases(bases2);
    r2.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    r2.setCigarString("3=1X5=1X");

    SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header, "r3", 0, 1, 10);
    r3.setReadBases(bases2);
    r3.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    r3.setCigarString("3=1X5M1X");

    SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header, "r4", 0, 1, 10);
    r4.setReadBases(bases2);
    r4.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20});
    r4.setCigarString("10M");

    List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);

    // create the iterator by state with the fake reads and fake records
    li = makeLTBS(reads, readAttributes);

    while (li.hasNext()) {
      AlignmentContext context = li.next();
      ReadBackedPileup pileup = context.getBasePileup();
      Assert.assertEquals(pileup.depthOfCoverage(), 4);
    }
  }
  /** Steps forward on the genome. Returns false when done reading the read, true otherwise. */
  public boolean stepForwardOnGenome() {
    if (currentOperatorIndex == numOperators) return false;

    CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
    if (currentPositionOnOperator >= curElement.getLength()) {
      if (++currentOperatorIndex == numOperators) return false;

      curElement = read.getCigar().getCigarElement(currentOperatorIndex);
      currentPositionOnOperator = 0;
    }

    switch (curElement.getOperator()) {
      case I: // insertion w.r.t. the reference
        if (!sawMop) break;
      case S: // soft clip
        currentReadOffset += curElement.getLength();
      case H: // hard clip
      case P: // padding
        currentOperatorIndex++;
        return stepForwardOnGenome();

      case D: // deletion w.r.t. the reference
      case N: // reference skip (looks and gets processed just like a "deletion", just different
        // logical meaning)
        currentPositionOnOperator++;
        break;

      case M:
      case EQ:
      case X:
        sawMop = true;
        currentReadOffset++;
        currentPositionOnOperator++;
        break;
      default:
        throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
    }

    final boolean isFirstOp = currentOperatorIndex == 0;
    final boolean isLastOp = currentOperatorIndex == numOperators - 1;
    final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
    final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();

    isBeforeDeletionStart =
        isBeforeOp(
            read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
    isBeforeDeletedBase =
        isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
    isAfterDeletionEnd =
        isAfterOp(
            read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
    isAfterDeletedBase =
        isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
    isBeforeInsertion =
        isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
            || (!sawMop && curElement.getOperator() == CigarOperator.I);
    isAfterInsertion =
        isAfterOp(
            read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
    isNextToSoftClip =
        isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
            || isAfterOp(
                read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);

    return true;
  }
 public LIBS_position(final SAMRecord read) {
   this.read = read;
   numOperators = read.getCigar().numCigarElements();
 }