예제 #1
0
  private T processActiveRegion(
      final ActiveRegion activeRegion,
      final LinkedHashSet<GATKSAMRecord> reads,
      final Queue<ActiveRegion> workQueue,
      final T sum,
      final ActiveRegionWalker<M, T> walker) {
    final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
    for (final GATKSAMRecord read : reads) {
      final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc(read);
      if (activeRegion.getLocation().overlapsP(readLoc)) {
        // The region which the highest amount of overlap is chosen as the primary region for the
        // read (tie breaking is done as right most region)
        long maxOverlap = activeRegion.getLocation().sizeOfOverlap(readLoc);
        ActiveRegion bestRegion = activeRegion;
        for (final ActiveRegion otherRegionToTest : workQueue) {
          if (otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap) {
            maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap(readLoc);
            bestRegion = otherRegionToTest;
          }
        }
        bestRegion.add(read);

        // The read is also added to all other regions in which it overlaps but marked as
        // non-primary
        if (walker.wantsNonPrimaryReads()) {
          if (!bestRegion.equals(activeRegion)) {
            activeRegion.add(read);
          }
          for (final ActiveRegion otherRegionToTest : workQueue) {
            if (!bestRegion.equals(otherRegionToTest)
                && otherRegionToTest.getExtendedLoc().overlapsP(readLoc)) {
              otherRegionToTest.add(read);
            }
          }
        }
        placedReads.add(read);
      } else if (activeRegion.getExtendedLoc().overlapsP(readLoc)
          && walker.wantsNonPrimaryReads()) {
        activeRegion.add(read);
      }
    }
    reads.removeAll(
        placedReads); // remove all the reads which have been placed into their active region
    // WARNING: This hashset relies on reads being exactly equal when they are placed in the list as
    // when they are removed. So the ActiveRegionWalker can't modify the reads in any way.

    logger.debug(
        ">> Map call with "
            + activeRegion.getReads().size()
            + " "
            + (activeRegion.isActive ? "active" : "inactive")
            + " reads @ "
            + activeRegion.getLocation()
            + " with full extent: "
            + activeRegion.getReferenceLoc());
    final M x = walker.map(activeRegion, null);
    return walker.reduce(x, sum);
  }
예제 #2
0
 private final ActivityProfileResult walkerActiveProb(
     final ActiveRegionWalker<M, T> walker,
     final RefMetaDataTracker tracker,
     final ReferenceContext refContext,
     final AlignmentContext locus,
     final GenomeLoc location) {
   if (walker.hasPresetActiveRegions()) {
     return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
   } else {
     return walker.isActive(tracker, refContext, locus);
   }
 }
예제 #3
0
  @Override
  public T traverse(
      final ActiveRegionWalker<M, T> walker, final LocusShardDataProvider dataProvider, T sum) {
    logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));

    final LocusView locusView = getLocusView(walker, dataProvider);
    final GenomeLocSortedSet initialIntervals = engine.getIntervals();

    final LocusReferenceView referenceView = new LocusReferenceView(walker, dataProvider);
    final int activeRegionExtension =
        walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
    final int maxRegionSize =
        walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();

    if (locusView
        .hasNext()) { // trivial optimization to avoid unnecessary processing when there's nothing
                      // here at all
      int minStart = Integer.MAX_VALUE;
      ActivityProfile profile =
          new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());

      ReferenceOrderedView referenceOrderedDataView =
          getReferenceOrderedView(walker, dataProvider, locusView);

      // We keep processing while the next reference location is within the interval
      GenomeLoc prevLoc = null;
      while (locusView.hasNext()) {
        final AlignmentContext locus = locusView.next();
        GenomeLoc location = locus.getLocation();

        if (prevLoc != null) {
          // fill in the active / inactive labels from the stop of the previous location to the
          // start of this location
          // TODO refactor to separate function
          for (int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++) {
            final GenomeLoc fakeLoc =
                engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
            if (initialIntervals == null || initialIntervals.overlaps(fakeLoc)) {
              profile.add(
                  fakeLoc,
                  new ActivityProfileResult(
                      walker.hasPresetActiveRegions()
                              && walker.presetActiveRegions.overlaps(fakeLoc)
                          ? 1.0
                          : 0.0));
            }
          }
        }

        dataProvider.getShard().getReadMetrics().incrementNumIterations();

        // create reference context. Note that if we have a pileup of "extended events", the context
        // will
        // hold the (longest) stretch of deleted reference bases (if deletions are present in the
        // pileup).
        final ReferenceContext refContext = referenceView.getReferenceContext(location);

        // Iterate forward to get all reference ordered data covering this location
        final RefMetaDataTracker tracker =
            referenceOrderedDataView.getReferenceOrderedDataAtLocus(
                locus.getLocation(), refContext);

        // Call the walkers isActive function for this locus and add them to the list to be
        // integrated later
        if (initialIntervals == null || initialIntervals.overlaps(location)) {
          profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
        }

        // Grab all the previously unseen reads from this pileup and add them to the massive read
        // list
        for (final PileupElement p : locus.getBasePileup()) {
          final GATKSAMRecord read = p.getRead();
          if (!myReads.contains(read)) {
            myReads.add(read);
          }

          // If this is the last pileup for this shard calculate the minimum alignment start so that
          // we know
          // which active regions in the work queue are now safe to process
          minStart = Math.min(minStart, read.getAlignmentStart());
        }

        prevLoc = location;

        printProgress(locus.getLocation());
      }

      updateCumulativeMetrics(dataProvider.getShard());

      // Take the individual isActive calls and integrate them into contiguous active regions and
      // add these blocks of work to the work queue
      // band-pass filter the list of isActive probabilities and turn into active regions
      final ActivityProfile bandPassFiltered = profile.bandPassFilter();
      final List<ActiveRegion> activeRegions =
          bandPassFiltered.createActiveRegions(activeRegionExtension, maxRegionSize);

      // add active regions to queue of regions to process
      // first check if can merge active regions over shard boundaries
      if (!activeRegions.isEmpty()) {
        if (!workQueue.isEmpty()) {
          final ActiveRegion last = workQueue.getLast();
          final ActiveRegion first = activeRegions.get(0);
          if (last.isActive == first.isActive
              && last.getLocation().contiguousP(first.getLocation())
              && last.getLocation().size() + first.getLocation().size() <= maxRegionSize) {
            workQueue.removeLast();
            activeRegions.remove(first);
            workQueue.add(
                new ActiveRegion(
                    last.getLocation().union(first.getLocation()),
                    first.isActive,
                    this.engine.getGenomeLocParser(),
                    activeRegionExtension));
          }
        }
        workQueue.addAll(activeRegions);
      }

      logger.debug(
          "Integrated "
              + profile.size()
              + " isActive calls into "
              + activeRegions.size()
              + " regions.");

      // now go and process all of the active regions
      sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
    }

    return sum;
  }