@Test public void testAdd() { GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 0); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); }
@Test public void testSizeBeforeLoc() { GenomeLoc r1 = genomeLocParser.createGenomeLoc(contigOneName, 3, 5); GenomeLoc r2 = genomeLocParser.createGenomeLoc(contigOneName, 10, 12); GenomeLoc r3 = genomeLocParser.createGenomeLoc(contigOneName, 16, 18); mSortedSet.addAll(Arrays.asList(r1, r2, r3)); testSizeBeforeLocX(2, 0); testSizeBeforeLocX(3, 0); testSizeBeforeLocX(4, 1); testSizeBeforeLocX(5, 2); testSizeBeforeLocX(6, 3); testSizeBeforeLocX(10, 3); testSizeBeforeLocX(11, 4); testSizeBeforeLocX(12, 5); testSizeBeforeLocX(13, 6); testSizeBeforeLocX(15, 6); testSizeBeforeLocX(16, 6); testSizeBeforeLocX(17, 7); testSizeBeforeLocX(18, 8); testSizeBeforeLocX(19, 9); testSizeBeforeLocX(50, 9); testSizeBeforeLocX(50, (int) mSortedSet.coveredSize()); }
@Test(expectedExceptions = IllegalArgumentException.class) public void testAddDuplicate() { assertTrue(mSortedSet.size() == 0); GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); mSortedSet.add(g); }
@Test(expectedExceptions = IllegalArgumentException.class) public void addThrowsException() { assertTrue(mSortedSet.size() == 0); GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 1, 50); mSortedSet.add(g); GenomeLoc f = genomeLocParser.createGenomeLoc(contigOneName, 30, 80); mSortedSet.add(f); }
@Test public void addRegion() { assertTrue(mSortedSet.size() == 0); GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 1, 50); mSortedSet.add(g); GenomeLoc f = genomeLocParser.createGenomeLoc(contigOneName, 30, 80); mSortedSet.addRegion(f); assertTrue(mSortedSet.size() == 1); }
/** * create a list of genomic locations, given a reference sequence * * @param dict the sequence dictionary to create a collection from * @return the GenomeLocSet of all references sequences as GenomeLoc's */ public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) { GenomeLocParser parser = new GenomeLocParser(dict); GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(parser); for (SAMSequenceRecord record : dict.getSequences()) { returnSortedSet.add( parser.createGenomeLoc(record.getSequenceName(), 1, record.getSequenceLength())); } return returnSortedSet; }
@Test public void deleteAllByRegion() { GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100); mSortedSet.add(e); for (int x = 1; x < 101; x++) { GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName, x, x); mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, del)); } assertTrue(mSortedSet.isEmpty()); }
@Test(dataProvider = "GetOverlapping") public void testGetOverlapping(final GenomeLocSortedSet intervals, final GenomeLoc region) { final List<GenomeLoc> expectedOverlapping = intervals.getOverlappingFullSearch(region); final List<GenomeLoc> actualOverlapping = intervals.getOverlapping(region); Assert.assertEquals(actualOverlapping, expectedOverlapping); Assert.assertEquals( intervals.overlaps(region), !expectedOverlapping.isEmpty(), "GenomeLocSortedSet.overlaps didn't return expected result"); }
@Test public void addRegionsOutOfOrder() { final String contigTwoName = header.getSequenceDictionary().getSequence(2).getSequenceName(); assertTrue(mSortedSet.size() == 0); GenomeLoc g = genomeLocParser.createGenomeLoc(contigTwoName, 1, 50); mSortedSet.add(g); GenomeLoc f = genomeLocParser.createGenomeLoc(contigOneName, 30, 80); mSortedSet.addRegion(f); assertTrue(mSortedSet.size() == 2); assertTrue(mSortedSet.toList().get(0).getContig().equals(contigOneName)); assertTrue(mSortedSet.toList().get(1).getContig().equals(contigTwoName)); }
@Test public void deleteSomeByRegion() { GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100); mSortedSet.add(e); for (int x = 1; x < 50; x++) { GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName, x, x); mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, del)); } assertTrue(!mSortedSet.isEmpty()); assertTrue(mSortedSet.size() == 1); GenomeLoc loc = mSortedSet.iterator().next(); assertTrue(loc.getStop() == 100); assertTrue(loc.getStart() == 50); }
@Test public void substractComplexExample() { GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 20); mSortedSet.add(e); GenomeLoc r1 = genomeLocParser.createGenomeLoc(contigOneName, 3, 5); GenomeLoc r2 = genomeLocParser.createGenomeLoc(contigOneName, 10, 12); GenomeLoc r3 = genomeLocParser.createGenomeLoc(contigOneName, 16, 18); GenomeLocSortedSet toExclude = new GenomeLocSortedSet(genomeLocParser, Arrays.asList(r1, r2, r3)); GenomeLocSortedSet remaining = mSortedSet.subtractRegions(toExclude); // logger.debug("Initial " + mSortedSet); // logger.debug("Exclude " + toExclude); // logger.debug("Remaining " + remaining); assertEquals(mSortedSet.coveredSize(), 20); assertEquals(toExclude.coveredSize(), 9); assertEquals(remaining.coveredSize(), 11); Iterator<GenomeLoc> it = remaining.iterator(); GenomeLoc p1 = it.next(); GenomeLoc p2 = it.next(); GenomeLoc p3 = it.next(); GenomeLoc p4 = it.next(); assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 1, 2), p1); assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 6, 9), p2); assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 13, 15), p3); assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 19, 20), p4); }
@Test public void fromSequenceDictionary() { mSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(this.header.getSequenceDictionary()); // we should have sequence assertTrue(mSortedSet.size() == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES); int seqNumber = 0; for (GenomeLoc loc : mSortedSet) { assertTrue(loc.getStart() == 1); assertTrue(loc.getStop() == GenomeLocSortedSetUnitTest.CHROMOSOME_SIZE); assertTrue(loc.getContigIndex() == seqNumber); ++seqNumber; } assertTrue(seqNumber == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES); }
/** * Creates a new ProgressData object recording a snapshot of our progress at this instant * * @param loc our current position. If null, assumes we are done traversing * @param nTotalRecordsProcessed the total number of records we've processed * @return */ private ProgressMeterData takeProgressSnapshot( final GenomeLoc loc, final long nTotalRecordsProcessed) { // null -> end of processing final long bpProcessed = loc == null ? targetSizeInBP : regionsBeingProcessed.sizeBeforeLoc(loc); return new ProgressMeterData(timer.getElapsedTime(), nTotalRecordsProcessed, bpProcessed); }
@Test public void deleteSuperRegion() { GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 10, 20); GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 70, 100); mSortedSet.add(g); mSortedSet.addRegion(e); assertTrue(mSortedSet.size() == 2); // now delete a region GenomeLoc d = genomeLocParser.createGenomeLoc(contigOneName, 15, 75); mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser, d)); Iterator<GenomeLoc> iter = mSortedSet.iterator(); GenomeLoc loc = iter.next(); assertTrue(loc.getStart() == 10); assertTrue(loc.getStop() == 14); assertTrue(loc.getContigIndex() == 1); loc = iter.next(); assertTrue(loc.getStart() == 76); assertTrue(loc.getStop() == 100); assertTrue(loc.getContigIndex() == 1); }
public Iterable<Shard> createShardsOverIntervals( final SAMDataSource readsDataSource, final GenomeLocSortedSet intervals, final int maxShardSize) { List<Shard> shards = new ArrayList<Shard>(); for (GenomeLoc interval : intervals) { while (interval.size() > maxShardSize) { shards.add( new LocusShard( intervals.getGenomeLocParser(), readsDataSource, Collections.singletonList( intervals .getGenomeLocParser() .createGenomeLoc( interval.getContig(), interval.getStart(), interval.getStart() + maxShardSize - 1)), null)); interval = intervals .getGenomeLocParser() .createGenomeLoc( interval.getContig(), interval.getStart() + maxShardSize, interval.getStop()); } shards.add( new LocusShard( intervals.getGenomeLocParser(), readsDataSource, Collections.singletonList(interval), null)); } return shards; }
@Test public void mergingOverlappingAbove() { GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50); GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 49, 100); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); mSortedSet.addRegion(e); assertTrue(mSortedSet.size() == 1); Iterator<GenomeLoc> iter = mSortedSet.iterator(); GenomeLoc loc = iter.next(); assertEquals(loc.getStart(), 0); assertEquals(loc.getStop(), 100); assertEquals(loc.getContigIndex(), 1); }
/** * Create a new ProgressMeter * * @param performanceLogFile an optional performance log file where a table of performance logs * will be written * @param processingUnitName the name of the unit type being processed, suitable for saying X * seconds per processingUnitName * @param processingIntervals the intervals being processed */ public ProgressMeter( final File performanceLogFile, final String processingUnitName, final GenomeLocSortedSet processingIntervals) { if (processingUnitName == null) throw new IllegalArgumentException("processingUnitName cannot be null"); if (processingIntervals == null) throw new IllegalArgumentException("Target intervals cannot be null"); this.processingUnitName = processingUnitName; this.regionsBeingProcessed = processingIntervals; // setup the performance logger output, if requested if (performanceLogFile != null) { try { this.performanceLog = new PrintStream(new FileOutputStream(performanceLogFile)); final List<String> pLogHeader = Arrays.asList( "elapsed.time", "units.processed", "processing.speed", "bp.processed", "bp.speed", "genome.fraction.complete", "est.total.runtime", "est.time.remaining"); performanceLog.println(Utils.join("\t", pLogHeader)); } catch (FileNotFoundException e) { throw new UserException.CouldNotCreateOutputFile(performanceLogFile, e); } } else { performanceLog = null; } // cached for performance reasons targetSizeInBP = processingIntervals.coveredSize(); // start up the timer progressMeterDaemon = new ProgressMeterDaemon(this); start(); }
/** * Create a sorted genome location set from a list of GenomeLocs. * * @param locs the list<GenomeLoc> * @return the sorted genome loc list */ public static GenomeLocSortedSet createSetFromList(GenomeLocParser parser, List<GenomeLoc> locs) { GenomeLocSortedSet set = new GenomeLocSortedSet(parser); set.addAll(locs); return set; }
@Test public void overlap() { for (int i = 1; i < 6; i++) { final int start = i * 10; mSortedSet.add(genomeLocParser.createGenomeLoc(contigOneName, start, start + 1)); } // test matches in and around interval assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 9, 9))); assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 10, 10))); assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 11, 11))); assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 12, 12))); // test matches spanning intervals assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 14, 20))); assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 11, 15))); assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 30, 40))); assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 51, 53))); // test miss assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 12, 19))); // test exact match after miss assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 40, 41))); // test matches at beginning of intervals assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 5, 6))); assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 0, 10))); // test matches at end of intervals assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 52, 53))); assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 51, 53))); assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 52, 53))); }
@Override public T traverse( final ActiveRegionWalker<M, T> walker, final LocusShardDataProvider dataProvider, T sum) { logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider)); final LocusView locusView = getLocusView(walker, dataProvider); final GenomeLocSortedSet initialIntervals = engine.getIntervals(); final LocusReferenceView referenceView = new LocusReferenceView(walker, dataProvider); final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension(); final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion(); if (locusView .hasNext()) { // trivial optimization to avoid unnecessary processing when there's nothing // here at all int minStart = Integer.MAX_VALUE; ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions()); ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); // We keep processing while the next reference location is within the interval GenomeLoc prevLoc = null; while (locusView.hasNext()) { final AlignmentContext locus = locusView.next(); GenomeLoc location = locus.getLocation(); if (prevLoc != null) { // fill in the active / inactive labels from the stop of the previous location to the // start of this location // TODO refactor to separate function for (int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++) { final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii); if (initialIntervals == null || initialIntervals.overlaps(fakeLoc)) { profile.add( fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0)); } } } dataProvider.getShard().getReadMetrics().incrementNumIterations(); // create reference context. Note that if we have a pileup of "extended events", the context // will // hold the (longest) stretch of deleted reference bases (if deletions are present in the // pileup). final ReferenceContext refContext = referenceView.getReferenceContext(location); // Iterate forward to get all reference ordered data covering this location final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus( locus.getLocation(), refContext); // Call the walkers isActive function for this locus and add them to the list to be // integrated later if (initialIntervals == null || initialIntervals.overlaps(location)) { profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location)); } // Grab all the previously unseen reads from this pileup and add them to the massive read // list for (final PileupElement p : locus.getBasePileup()) { final GATKSAMRecord read = p.getRead(); if (!myReads.contains(read)) { myReads.add(read); } // If this is the last pileup for this shard calculate the minimum alignment start so that // we know // which active regions in the work queue are now safe to process minStart = Math.min(minStart, read.getAlignmentStart()); } prevLoc = location; printProgress(locus.getLocation()); } updateCumulativeMetrics(dataProvider.getShard()); // Take the individual isActive calls and integrate them into contiguous active regions and // add these blocks of work to the work queue // band-pass filter the list of isActive probabilities and turn into active regions final ActivityProfile bandPassFiltered = profile.bandPassFilter(); final List<ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions(activeRegionExtension, maxRegionSize); // add active regions to queue of regions to process // first check if can merge active regions over shard boundaries if (!activeRegions.isEmpty()) { if (!workQueue.isEmpty()) { final ActiveRegion last = workQueue.getLast(); final ActiveRegion first = activeRegions.get(0); if (last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize) { workQueue.removeLast(); activeRegions.remove(first); workQueue.add( new ActiveRegion( last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension)); } } workQueue.addAll(activeRegions); } logger.debug( "Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions."); // now go and process all of the active regions sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig()); } return sum; }
private void testSizeBeforeLocX(int pos, int size) { GenomeLoc test = genomeLocParser.createGenomeLoc(contigOneName, pos, pos); assertEquals( mSortedSet.sizeBeforeLoc(test), size, String.format("X pos=%d size=%d", pos, size)); }