Пример #1
0
  @Test
  public void testCountsFromLocusTraversal() {
    final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
    engine.setGenomeLocParser(genomeLocParser);

    final Collection<SAMReaderID> samFiles = new ArrayList<>();
    final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
    samFiles.add(readerID);

    final SAMDataSource dataSource =
        new SAMDataSource(
            samFiles,
            new ThreadAllocation(),
            null,
            genomeLocParser,
            false,
            SAMFileReader.ValidationStringency.STRICT,
            null,
            null,
            new ValidationExclusion(),
            new ArrayList<ReadFilter>(),
            new ArrayList<ReadTransformer>(),
            false,
            (byte) 30,
            false,
            true);

    engine.setReadsDataSource(dataSource);
    final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());

    final TraverseLociNano traverseLociNano = new TraverseLociNano(1);
    final DummyLocusWalker walker = new DummyLocusWalker();
    traverseLociNano.initialize(engine, walker, null);

    for (final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer())) {
      final WindowMaker windowMaker =
          new WindowMaker(
              shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
      for (WindowMaker.WindowMakerIterator window : windowMaker) {
        final LocusShardDataProvider dataProvider =
            new LocusShardDataProvider(
                shard,
                shard.getReadProperties(),
                genomeLocParser,
                window.getLocus(),
                window,
                reference,
                new ArrayList<ReferenceOrderedDataSource>());
        traverseLociNano.traverse(walker, dataProvider, 0);
        dataProvider.close();
      }
      windowMaker.close();
    }

    // dataSource.close();
    Assert.assertEquals(
        engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
    Assert.assertEquals(
        engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
  }
Пример #2
0
  @Override
  public T traverse(
      final ActiveRegionWalker<M, T> walker, final LocusShardDataProvider dataProvider, T sum) {
    logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));

    final LocusView locusView = getLocusView(walker, dataProvider);
    final GenomeLocSortedSet initialIntervals = engine.getIntervals();

    final LocusReferenceView referenceView = new LocusReferenceView(walker, dataProvider);
    final int activeRegionExtension =
        walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
    final int maxRegionSize =
        walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();

    if (locusView
        .hasNext()) { // trivial optimization to avoid unnecessary processing when there's nothing
                      // here at all
      int minStart = Integer.MAX_VALUE;
      ActivityProfile profile =
          new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());

      ReferenceOrderedView referenceOrderedDataView =
          getReferenceOrderedView(walker, dataProvider, locusView);

      // We keep processing while the next reference location is within the interval
      GenomeLoc prevLoc = null;
      while (locusView.hasNext()) {
        final AlignmentContext locus = locusView.next();
        GenomeLoc location = locus.getLocation();

        if (prevLoc != null) {
          // fill in the active / inactive labels from the stop of the previous location to the
          // start of this location
          // TODO refactor to separate function
          for (int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++) {
            final GenomeLoc fakeLoc =
                engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
            if (initialIntervals == null || initialIntervals.overlaps(fakeLoc)) {
              profile.add(
                  fakeLoc,
                  new ActivityProfileResult(
                      walker.hasPresetActiveRegions()
                              && walker.presetActiveRegions.overlaps(fakeLoc)
                          ? 1.0
                          : 0.0));
            }
          }
        }

        dataProvider.getShard().getReadMetrics().incrementNumIterations();

        // create reference context. Note that if we have a pileup of "extended events", the context
        // will
        // hold the (longest) stretch of deleted reference bases (if deletions are present in the
        // pileup).
        final ReferenceContext refContext = referenceView.getReferenceContext(location);

        // Iterate forward to get all reference ordered data covering this location
        final RefMetaDataTracker tracker =
            referenceOrderedDataView.getReferenceOrderedDataAtLocus(
                locus.getLocation(), refContext);

        // Call the walkers isActive function for this locus and add them to the list to be
        // integrated later
        if (initialIntervals == null || initialIntervals.overlaps(location)) {
          profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
        }

        // Grab all the previously unseen reads from this pileup and add them to the massive read
        // list
        for (final PileupElement p : locus.getBasePileup()) {
          final GATKSAMRecord read = p.getRead();
          if (!myReads.contains(read)) {
            myReads.add(read);
          }

          // If this is the last pileup for this shard calculate the minimum alignment start so that
          // we know
          // which active regions in the work queue are now safe to process
          minStart = Math.min(minStart, read.getAlignmentStart());
        }

        prevLoc = location;

        printProgress(locus.getLocation());
      }

      updateCumulativeMetrics(dataProvider.getShard());

      // Take the individual isActive calls and integrate them into contiguous active regions and
      // add these blocks of work to the work queue
      // band-pass filter the list of isActive probabilities and turn into active regions
      final ActivityProfile bandPassFiltered = profile.bandPassFilter();
      final List<ActiveRegion> activeRegions =
          bandPassFiltered.createActiveRegions(activeRegionExtension, maxRegionSize);

      // add active regions to queue of regions to process
      // first check if can merge active regions over shard boundaries
      if (!activeRegions.isEmpty()) {
        if (!workQueue.isEmpty()) {
          final ActiveRegion last = workQueue.getLast();
          final ActiveRegion first = activeRegions.get(0);
          if (last.isActive == first.isActive
              && last.getLocation().contiguousP(first.getLocation())
              && last.getLocation().size() + first.getLocation().size() <= maxRegionSize) {
            workQueue.removeLast();
            activeRegions.remove(first);
            workQueue.add(
                new ActiveRegion(
                    last.getLocation().union(first.getLocation()),
                    first.isActive,
                    this.engine.getGenomeLocParser(),
                    activeRegionExtension));
          }
        }
        workQueue.addAll(activeRegions);
      }

      logger.debug(
          "Integrated "
              + profile.size()
              + " isActive calls into "
              + activeRegions.size()
              + " regions.");

      // now go and process all of the active regions
      sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
    }

    return sum;
  }