Ejemplo n.º 1
0
 @Test
 public void testAdd() {
   GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 0);
   assertTrue(mSortedSet.size() == 0);
   mSortedSet.add(g);
   assertTrue(mSortedSet.size() == 1);
 }
Ejemplo n.º 2
0
  @Test
  public void testSizeBeforeLoc() {
    GenomeLoc r1 = genomeLocParser.createGenomeLoc(contigOneName, 3, 5);
    GenomeLoc r2 = genomeLocParser.createGenomeLoc(contigOneName, 10, 12);
    GenomeLoc r3 = genomeLocParser.createGenomeLoc(contigOneName, 16, 18);
    mSortedSet.addAll(Arrays.asList(r1, r2, r3));

    testSizeBeforeLocX(2, 0);
    testSizeBeforeLocX(3, 0);
    testSizeBeforeLocX(4, 1);
    testSizeBeforeLocX(5, 2);
    testSizeBeforeLocX(6, 3);

    testSizeBeforeLocX(10, 3);
    testSizeBeforeLocX(11, 4);
    testSizeBeforeLocX(12, 5);
    testSizeBeforeLocX(13, 6);
    testSizeBeforeLocX(15, 6);

    testSizeBeforeLocX(16, 6);
    testSizeBeforeLocX(17, 7);
    testSizeBeforeLocX(18, 8);
    testSizeBeforeLocX(19, 9);
    testSizeBeforeLocX(50, 9);
    testSizeBeforeLocX(50, (int) mSortedSet.coveredSize());
  }
Ejemplo n.º 3
0
 @Test(expectedExceptions = IllegalArgumentException.class)
 public void testAddDuplicate() {
   assertTrue(mSortedSet.size() == 0);
   GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 0);
   mSortedSet.add(g);
   assertTrue(mSortedSet.size() == 1);
   mSortedSet.add(g);
 }
Ejemplo n.º 4
0
 @Test(expectedExceptions = IllegalArgumentException.class)
 public void addThrowsException() {
   assertTrue(mSortedSet.size() == 0);
   GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 1, 50);
   mSortedSet.add(g);
   GenomeLoc f = genomeLocParser.createGenomeLoc(contigOneName, 30, 80);
   mSortedSet.add(f);
 }
Ejemplo n.º 5
0
 @Test
 public void addRegion() {
   assertTrue(mSortedSet.size() == 0);
   GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 1, 50);
   mSortedSet.add(g);
   GenomeLoc f = genomeLocParser.createGenomeLoc(contigOneName, 30, 80);
   mSortedSet.addRegion(f);
   assertTrue(mSortedSet.size() == 1);
 }
Ejemplo n.º 6
0
 /**
  * create a list of genomic locations, given a reference sequence
  *
  * @param dict the sequence dictionary to create a collection from
  * @return the GenomeLocSet of all references sequences as GenomeLoc's
  */
 public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
   GenomeLocParser parser = new GenomeLocParser(dict);
   GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(parser);
   for (SAMSequenceRecord record : dict.getSequences()) {
     returnSortedSet.add(
         parser.createGenomeLoc(record.getSequenceName(), 1, record.getSequenceLength()));
   }
   return returnSortedSet;
 }
Ejemplo n.º 7
0
 @Test
 public void deleteAllByRegion() {
   GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100);
   mSortedSet.add(e);
   for (int x = 1; x < 101; x++) {
     GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName, x, x);
     mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, del));
   }
   assertTrue(mSortedSet.isEmpty());
 }
Ejemplo n.º 8
0
 @Test(dataProvider = "GetOverlapping")
 public void testGetOverlapping(final GenomeLocSortedSet intervals, final GenomeLoc region) {
   final List<GenomeLoc> expectedOverlapping = intervals.getOverlappingFullSearch(region);
   final List<GenomeLoc> actualOverlapping = intervals.getOverlapping(region);
   Assert.assertEquals(actualOverlapping, expectedOverlapping);
   Assert.assertEquals(
       intervals.overlaps(region),
       !expectedOverlapping.isEmpty(),
       "GenomeLocSortedSet.overlaps didn't return expected result");
 }
Ejemplo n.º 9
0
 @Test
 public void addRegionsOutOfOrder() {
   final String contigTwoName = header.getSequenceDictionary().getSequence(2).getSequenceName();
   assertTrue(mSortedSet.size() == 0);
   GenomeLoc g = genomeLocParser.createGenomeLoc(contigTwoName, 1, 50);
   mSortedSet.add(g);
   GenomeLoc f = genomeLocParser.createGenomeLoc(contigOneName, 30, 80);
   mSortedSet.addRegion(f);
   assertTrue(mSortedSet.size() == 2);
   assertTrue(mSortedSet.toList().get(0).getContig().equals(contigOneName));
   assertTrue(mSortedSet.toList().get(1).getContig().equals(contigTwoName));
 }
Ejemplo n.º 10
0
 @Test
 public void deleteSomeByRegion() {
   GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100);
   mSortedSet.add(e);
   for (int x = 1; x < 50; x++) {
     GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName, x, x);
     mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, del));
   }
   assertTrue(!mSortedSet.isEmpty());
   assertTrue(mSortedSet.size() == 1);
   GenomeLoc loc = mSortedSet.iterator().next();
   assertTrue(loc.getStop() == 100);
   assertTrue(loc.getStart() == 50);
 }
Ejemplo n.º 11
0
  @Test
  public void substractComplexExample() {
    GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 20);
    mSortedSet.add(e);

    GenomeLoc r1 = genomeLocParser.createGenomeLoc(contigOneName, 3, 5);
    GenomeLoc r2 = genomeLocParser.createGenomeLoc(contigOneName, 10, 12);
    GenomeLoc r3 = genomeLocParser.createGenomeLoc(contigOneName, 16, 18);
    GenomeLocSortedSet toExclude =
        new GenomeLocSortedSet(genomeLocParser, Arrays.asList(r1, r2, r3));

    GenomeLocSortedSet remaining = mSortedSet.subtractRegions(toExclude);
    //        logger.debug("Initial   " + mSortedSet);
    //        logger.debug("Exclude   " + toExclude);
    //        logger.debug("Remaining " + remaining);

    assertEquals(mSortedSet.coveredSize(), 20);
    assertEquals(toExclude.coveredSize(), 9);
    assertEquals(remaining.coveredSize(), 11);

    Iterator<GenomeLoc> it = remaining.iterator();
    GenomeLoc p1 = it.next();
    GenomeLoc p2 = it.next();
    GenomeLoc p3 = it.next();
    GenomeLoc p4 = it.next();

    assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 1, 2), p1);
    assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 6, 9), p2);
    assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 13, 15), p3);
    assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 19, 20), p4);
  }
Ejemplo n.º 12
0
 @Test
 public void fromSequenceDictionary() {
   mSortedSet =
       GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary());
   // we should have sequence
   assertTrue(mSortedSet.size() == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES);
   int seqNumber = 0;
   for (GenomeLoc loc : mSortedSet) {
     assertTrue(loc.getStart() == 1);
     assertTrue(loc.getStop() == GenomeLocSortedSetUnitTest.CHROMOSOME_SIZE);
     assertTrue(loc.getContigIndex() == seqNumber);
     ++seqNumber;
   }
   assertTrue(seqNumber == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES);
 }
Ejemplo n.º 13
0
 /**
  * Creates a new ProgressData object recording a snapshot of our progress at this instant
  *
  * @param loc our current position. If null, assumes we are done traversing
  * @param nTotalRecordsProcessed the total number of records we've processed
  * @return
  */
 private ProgressMeterData takeProgressSnapshot(
     final GenomeLoc loc, final long nTotalRecordsProcessed) {
   // null -> end of processing
   final long bpProcessed =
       loc == null ? targetSizeInBP : regionsBeingProcessed.sizeBeforeLoc(loc);
   return new ProgressMeterData(timer.getElapsedTime(), nTotalRecordsProcessed, bpProcessed);
 }
Ejemplo n.º 14
0
  @Test
  public void deleteSuperRegion() {
    GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 10, 20);
    GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 70, 100);
    mSortedSet.add(g);
    mSortedSet.addRegion(e);
    assertTrue(mSortedSet.size() == 2);
    // now delete a region
    GenomeLoc d = genomeLocParser.createGenomeLoc(contigOneName, 15, 75);
    mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, d));
    Iterator<GenomeLoc> iter = mSortedSet.iterator();
    GenomeLoc loc = iter.next();
    assertTrue(loc.getStart() == 10);
    assertTrue(loc.getStop() == 14);
    assertTrue(loc.getContigIndex() == 1);

    loc = iter.next();
    assertTrue(loc.getStart() == 76);
    assertTrue(loc.getStop() == 100);
    assertTrue(loc.getContigIndex() == 1);
  }
Ejemplo n.º 15
0
  public Iterable<Shard> createShardsOverIntervals(
      final SAMDataSource readsDataSource,
      final GenomeLocSortedSet intervals,
      final int maxShardSize) {
    List<Shard> shards = new ArrayList<Shard>();

    for (GenomeLoc interval : intervals) {
      while (interval.size() > maxShardSize) {
        shards.add(
            new LocusShard(
                intervals.getGenomeLocParser(),
                readsDataSource,
                Collections.singletonList(
                    intervals
                        .getGenomeLocParser()
                        .createGenomeLoc(
                            interval.getContig(),
                            interval.getStart(),
                            interval.getStart() + maxShardSize - 1)),
                null));
        interval =
            intervals
                .getGenomeLocParser()
                .createGenomeLoc(
                    interval.getContig(), interval.getStart() + maxShardSize, interval.getStop());
      }
      shards.add(
          new LocusShard(
              intervals.getGenomeLocParser(),
              readsDataSource,
              Collections.singletonList(interval),
              null));
    }

    return shards;
  }
Ejemplo n.º 16
0
 @Test
 public void mergingOverlappingAbove() {
   GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50);
   GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 49, 100);
   assertTrue(mSortedSet.size() == 0);
   mSortedSet.add(g);
   assertTrue(mSortedSet.size() == 1);
   mSortedSet.addRegion(e);
   assertTrue(mSortedSet.size() == 1);
   Iterator<GenomeLoc> iter = mSortedSet.iterator();
   GenomeLoc loc = iter.next();
   assertEquals(loc.getStart(), 0);
   assertEquals(loc.getStop(), 100);
   assertEquals(loc.getContigIndex(), 1);
 }
Ejemplo n.º 17
0
  /**
   * Create a new ProgressMeter
   *
   * @param performanceLogFile an optional performance log file where a table of performance logs
   *     will be written
   * @param processingUnitName the name of the unit type being processed, suitable for saying X
   *     seconds per processingUnitName
   * @param processingIntervals the intervals being processed
   */
  public ProgressMeter(
      final File performanceLogFile,
      final String processingUnitName,
      final GenomeLocSortedSet processingIntervals) {
    if (processingUnitName == null)
      throw new IllegalArgumentException("processingUnitName cannot be null");
    if (processingIntervals == null)
      throw new IllegalArgumentException("Target intervals cannot be null");

    this.processingUnitName = processingUnitName;
    this.regionsBeingProcessed = processingIntervals;

    // setup the performance logger output, if requested
    if (performanceLogFile != null) {
      try {
        this.performanceLog = new PrintStream(new FileOutputStream(performanceLogFile));
        final List<String> pLogHeader =
            Arrays.asList(
                "elapsed.time",
                "units.processed",
                "processing.speed",
                "bp.processed",
                "bp.speed",
                "genome.fraction.complete",
                "est.total.runtime",
                "est.time.remaining");
        performanceLog.println(Utils.join("\t", pLogHeader));
      } catch (FileNotFoundException e) {
        throw new UserException.CouldNotCreateOutputFile(performanceLogFile, e);
      }
    } else {
      performanceLog = null;
    }

    // cached for performance reasons
    targetSizeInBP = processingIntervals.coveredSize();

    // start up the timer
    progressMeterDaemon = new ProgressMeterDaemon(this);
    start();
  }
Ejemplo n.º 18
0
 /**
  * Create a sorted genome location set from a list of GenomeLocs.
  *
  * @param locs the list<GenomeLoc>
  * @return the sorted genome loc list
  */
 public static GenomeLocSortedSet createSetFromList(GenomeLocParser parser, List<GenomeLoc> locs) {
   GenomeLocSortedSet set = new GenomeLocSortedSet(parser);
   set.addAll(locs);
   return set;
 }
Ejemplo n.º 19
0
  @Test
  public void overlap() {
    for (int i = 1; i < 6; i++) {
      final int start = i * 10;
      mSortedSet.add(genomeLocParser.createGenomeLoc(contigOneName, start, start + 1));
    }

    // test matches in and around interval
    assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 9, 9)));
    assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 10, 10)));
    assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 11, 11)));
    assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 12, 12)));

    // test matches spanning intervals
    assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 14, 20)));
    assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 11, 15)));
    assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 30, 40)));
    assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 51, 53)));

    // test miss
    assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 12, 19)));

    // test exact match after miss
    assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 40, 41)));

    // test matches at beginning of intervals
    assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 5, 6)));
    assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 0, 10)));

    // test matches at end of intervals
    assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 52, 53)));
    assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 51, 53)));
    assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 52, 53)));
  }
Ejemplo n.º 20
0
  @Override
  public T traverse(
      final ActiveRegionWalker<M, T> walker, final LocusShardDataProvider dataProvider, T sum) {
    logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));

    final LocusView locusView = getLocusView(walker, dataProvider);
    final GenomeLocSortedSet initialIntervals = engine.getIntervals();

    final LocusReferenceView referenceView = new LocusReferenceView(walker, dataProvider);
    final int activeRegionExtension =
        walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
    final int maxRegionSize =
        walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();

    if (locusView
        .hasNext()) { // trivial optimization to avoid unnecessary processing when there's nothing
                      // here at all
      int minStart = Integer.MAX_VALUE;
      ActivityProfile profile =
          new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());

      ReferenceOrderedView referenceOrderedDataView =
          getReferenceOrderedView(walker, dataProvider, locusView);

      // We keep processing while the next reference location is within the interval
      GenomeLoc prevLoc = null;
      while (locusView.hasNext()) {
        final AlignmentContext locus = locusView.next();
        GenomeLoc location = locus.getLocation();

        if (prevLoc != null) {
          // fill in the active / inactive labels from the stop of the previous location to the
          // start of this location
          // TODO refactor to separate function
          for (int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++) {
            final GenomeLoc fakeLoc =
                engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
            if (initialIntervals == null || initialIntervals.overlaps(fakeLoc)) {
              profile.add(
                  fakeLoc,
                  new ActivityProfileResult(
                      walker.hasPresetActiveRegions()
                              && walker.presetActiveRegions.overlaps(fakeLoc)
                          ? 1.0
                          : 0.0));
            }
          }
        }

        dataProvider.getShard().getReadMetrics().incrementNumIterations();

        // create reference context. Note that if we have a pileup of "extended events", the context
        // will
        // hold the (longest) stretch of deleted reference bases (if deletions are present in the
        // pileup).
        final ReferenceContext refContext = referenceView.getReferenceContext(location);

        // Iterate forward to get all reference ordered data covering this location
        final RefMetaDataTracker tracker =
            referenceOrderedDataView.getReferenceOrderedDataAtLocus(
                locus.getLocation(), refContext);

        // Call the walkers isActive function for this locus and add them to the list to be
        // integrated later
        if (initialIntervals == null || initialIntervals.overlaps(location)) {
          profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
        }

        // Grab all the previously unseen reads from this pileup and add them to the massive read
        // list
        for (final PileupElement p : locus.getBasePileup()) {
          final GATKSAMRecord read = p.getRead();
          if (!myReads.contains(read)) {
            myReads.add(read);
          }

          // If this is the last pileup for this shard calculate the minimum alignment start so that
          // we know
          // which active regions in the work queue are now safe to process
          minStart = Math.min(minStart, read.getAlignmentStart());
        }

        prevLoc = location;

        printProgress(locus.getLocation());
      }

      updateCumulativeMetrics(dataProvider.getShard());

      // Take the individual isActive calls and integrate them into contiguous active regions and
      // add these blocks of work to the work queue
      // band-pass filter the list of isActive probabilities and turn into active regions
      final ActivityProfile bandPassFiltered = profile.bandPassFilter();
      final List<ActiveRegion> activeRegions =
          bandPassFiltered.createActiveRegions(activeRegionExtension, maxRegionSize);

      // add active regions to queue of regions to process
      // first check if can merge active regions over shard boundaries
      if (!activeRegions.isEmpty()) {
        if (!workQueue.isEmpty()) {
          final ActiveRegion last = workQueue.getLast();
          final ActiveRegion first = activeRegions.get(0);
          if (last.isActive == first.isActive
              && last.getLocation().contiguousP(first.getLocation())
              && last.getLocation().size() + first.getLocation().size() <= maxRegionSize) {
            workQueue.removeLast();
            activeRegions.remove(first);
            workQueue.add(
                new ActiveRegion(
                    last.getLocation().union(first.getLocation()),
                    first.isActive,
                    this.engine.getGenomeLocParser(),
                    activeRegionExtension));
          }
        }
        workQueue.addAll(activeRegions);
      }

      logger.debug(
          "Integrated "
              + profile.size()
              + " isActive calls into "
              + activeRegions.size()
              + " regions.");

      // now go and process all of the active regions
      sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
    }

    return sum;
  }
Ejemplo n.º 21
0
 private void testSizeBeforeLocX(int pos, int size) {
   GenomeLoc test = genomeLocParser.createGenomeLoc(contigOneName, pos, pos);
   assertEquals(
       mSortedSet.sizeBeforeLoc(test), size, String.format("X pos=%d size=%d", pos, size));
 }