@Test(dependsOnMethods = "testCorrectInitialization") public void testExonByName() { for (final SimpleInterval loc : nonOverlappingExomeIntervals) { Assert.assertEquals(exonDB.target(loc.toString()), loc); } Assert.assertNull(exonDB.target("no-id")); }
@Test(dependsOnMethods = {"testCorrectInitialization"}) public void testExomeSize() { int size = 0; for (final SimpleInterval loc : nonOverlappingExomeIntervals) { size += loc.size(); } Assert.assertEquals(exonDB.exomeSize(), size); }
/** * Target information string composer for the genomic coordinate part of the target. * * @param index the index of a target within the collection. * @param collection the containing target collection. * @return never {@code null}. */ private static String coordinateComposer( final int index, final TargetCollection<Target> collection) { final SimpleInterval location = collection.location(index); if (location == null) { return String.join(COLUMN_SEPARATOR, NO_VALUE_STRING, NO_VALUE_STRING, NO_VALUE_STRING); } else { return String.format( String.join(COLUMN_SEPARATOR, "%s", "%d", "%d"), location.getContig(), location.getStart(), location.getEnd()); } }
// returns all the intervals that overlap with the query. // (use the optimized version instead, unless you're testing it and need something to compare // against) protected ArrayList<T> getOverlappingIgnoringIndex(SimpleInterval query) { if (!contig.equals(query.getContig())) { // different contig, so we know no one'll overlap. return new ArrayList<T>(); } ArrayList<T> ret = new ArrayList<T>(); for (T v : vs) { // they are sorted by start location, so if this one starts too late // then all of the others will, too. if (v.getStart() > query.getEnd()) { break; } if (query.overlaps(v)) { ret.add(v); } } return ret; }
/** * Writes the row in the main matrix output file for a target and, if requested, the corresponding * row in the row summary output file. * * @param countBuffer the counts for the target. * @param index the index of target within the target collection. */ private void writeOutputRows( final int[] countBuffer, final long[] columnTotals, final int index) { final String countString = IntStream.range(0, countBuffer.length) .mapToObj(i -> transform.apply(countBuffer[i], columnTotals[i])) .collect(Collectors.joining(COLUMN_SEPARATOR)); final String targetInfoString = targetOutInfo.composeTargetOutInfoString(index, targetCollection); outputWriter.println(String.join(COLUMN_SEPARATOR, targetInfoString, countString)); if (rowSummaryOutputWriter != null) { final long sum = MathUtils.sum(countBuffer); final SimpleInterval location = targetCollection.location(index); final int targetSize = location.size(); rowSummaryOutputWriter.println( String.join( COLUMN_SEPARATOR, targetInfoString, Long.toString(sum), String.format( AVERAGE_DOUBLE_FORMAT, sum / ((float) countColumns.columnCount() * targetSize)))); } }
/** * Returns all the intervals that overlap with the query. The query doesn't *have* to be in the * same contig as the intervals we hold, but of course if it isn't you'll get an empty result. You * may modify the returned list. */ public ArrayList<T> getOverlapping(SimpleInterval query) { if (!contig.equals(query.getContig())) { // different contig, so we know no one'll overlap. return new ArrayList<T>(); } ArrayList<T> ret = new ArrayList<T>(); // use index to skip early non-overlapping entries. int idx = firstPotentiallyReaching(query.getStart()); if (idx < 0) { idx = 0; } for (; idx < vs.size(); idx++) { T v = vs.get(idx); // they are sorted by start location, so if this one starts too late // then all of the others will, too. if (v.getStart() > query.getEnd()) { break; } if (query.overlaps(v)) { ret.add(v); } } return ret; }
/** * Set expanded window boundaries, subject to cropping at contig boundaries * * <p>Allows the client to request a specific number of extra reference bases to include before * and after the bases within our interval. These extra bases will be returned by calls to {@link * #getBases} and {@link #iterator} in addition to the bases spanning our actual interval. * * <p>Note that the true window size may be smaller than requested due to cropping at contig * boundaries. Call {@link @numWindowLeadingBases} and {@link @numWindowTrailingBases} to get the * actual window dimensions. * * @param windowLeadingBases Number of extra reference bases to include before the start of our * interval. Must be >= 0. * @param windowTrailingBases Number of extra reference bases to include after the end of our * interval. Must be >= 0. */ public void setWindow(final int windowLeadingBases, final int windowTrailingBases) { if (windowLeadingBases < 0) throw new GATKException("Reference window starts after the current interval"); if (windowTrailingBases < 0) throw new GATKException("Reference window ends before the current interval"); if (interval == null || (windowLeadingBases == 0 && windowTrailingBases == 0)) { // the "windowless" case window = interval; } else { window = new SimpleInterval( interval.getContig(), calculateWindowStart(interval, windowLeadingBases), calculateWindowStop(interval, windowTrailingBases)); } // Changing the window size invalidates our cached query result cachedSequence = null; }
public static JavaPairRDD<GATKRead, Iterable<GATKVariant>> join( final JavaRDD<GATKRead> reads, final JavaRDD<GATKVariant> variants) { final JavaSparkContext ctx = new JavaSparkContext(reads.context()); final IntervalsSkipList<GATKVariant> variantSkipList = new IntervalsSkipList<>(variants.collect()); final Broadcast<IntervalsSkipList<GATKVariant>> variantsBroadcast = ctx.broadcast(variantSkipList); return reads.mapToPair( r -> { final IntervalsSkipList<GATKVariant> intervalsSkipList = variantsBroadcast.getValue(); if (SimpleInterval.isValid(r.getContig(), r.getStart(), r.getEnd())) { return new Tuple2<>(r, intervalsSkipList.getOverlapping(new SimpleInterval(r))); } else { // Sometimes we have reads that do not form valid intervals (reads that do not consume // any ref bases, eg CIGAR 61S90I // In those cases, we'll just say that nothing overlaps the read return new Tuple2<>(r, Collections.emptyList()); } }); }
public static JavaPairRDD<GATKRead, ReferenceBases> addBases( final ReferenceDataflowSource referenceDataflowSource, final JavaRDD<GATKRead> reads) { SerializableFunction<GATKRead, SimpleInterval> windowFunction = referenceDataflowSource.getReferenceWindowFunction(); JavaPairRDD<ReferenceShard, GATKRead> shardRead = reads.mapToPair( gatkRead -> { ReferenceShard shard = ReferenceShard.getShardNumberFromInterval(windowFunction.apply(gatkRead)); return new Tuple2<>(shard, gatkRead); }); JavaPairRDD<ReferenceShard, Iterable<GATKRead>> shardiRead = shardRead.groupByKey(); return shardiRead.flatMapToPair( in -> { List<Tuple2<GATKRead, ReferenceBases>> out = Lists.newArrayList(); Iterable<GATKRead> iReads = in._2(); // Apply the reference window function to each read to produce a set of intervals // representing // the desired reference bases for each read. final List<SimpleInterval> readWindows = StreamSupport.stream(iReads.spliterator(), false) .map(read -> windowFunction.apply(read)) .collect(Collectors.toList()); SimpleInterval interval = SimpleInterval.getSpanningInterval(readWindows); ReferenceBases bases = referenceDataflowSource.getReferenceBases(null, interval); for (GATKRead r : iReads) { final ReferenceBases subset = bases.getSubset(windowFunction.apply(r)); out.add(new Tuple2<>(r, subset)); } return out; }); }
/** * Determines the stop of the expanded reference window, bounded if necessary by the contig. * * @param locus The locus to expand. * @param windowTrailingBases number of bases to attempt to expand relative to the locus end (>= * 0) * @return The end of the expanded window. */ private int calculateWindowStop(final SimpleInterval locus, final int windowTrailingBases) { final int sequenceLength = dataSource.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength(); return Math.min(locus.getEnd() + windowTrailingBases, sequenceLength); }
/** * Determines the start of the expanded reference window, bounded by 1. * * @param locus The locus to expand. * @param windowLeadingBases number of bases to attempt to expand relative to the locus start (>= * 0) * @return The start of the expanded window. */ private int calculateWindowStart(final SimpleInterval locus, final int windowLeadingBases) { return Math.max(locus.getStart() - windowLeadingBases, 1); }
/** * Get the number of extra bases of context after the end of our interval, as configured by a call * to {@link #setWindow} or at construction time. * * <p>Actual number of bases may be less than originally requested if the interval is near a * contig boundary. * * @return number of extra bases of context after the end of our interval */ public int numWindowTrailingBases() { return window == null ? 0 : window.getEnd() - interval.getEnd(); }
/** * Get the number of extra bases of context before the start of our interval, as configured by a * call to {@link #setWindow} or at construction time. * * <p>Actual number of bases may be less than originally requested if the interval is near a * contig boundary. * * @return number of extra bases of context before the start of our interval */ public int numWindowLeadingBases() { return window == null ? 0 : interval.getStart() - window.getStart(); }
@DataProvider(name = "exonLookUpData") public Object[][] exonLookUpData() { final List<Object[]> result = new ArrayList<>(); for (int i = 0; i < nonOverlappingExomeIntervals.size(); i++) { result.add( new Object[] { nonOverlappingExomeIntervals.get(i), nonOverlappingExomeIntervals.get(i), i }); } for (int i = 0; i < nonOverlappingExomeIntervals.size(); i++) { final SimpleInterval interval = nonOverlappingExomeIntervals.get(i); result.add( new Object[] { new SimpleInterval(interval.getContig(), interval.getStart(), interval.getStart()), interval, i }); } for (int i = 0; i < nonOverlappingExomeIntervals.size(); i++) { final SimpleInterval interval = nonOverlappingExomeIntervals.get(i); result.add( new Object[] { new SimpleInterval(interval.getContig(), interval.getEnd(), interval.getEnd()), interval, i }); } for (int i = 1; i < nonOverlappingExomeIntervals.size(); i++) { final SimpleInterval previous = nonOverlappingExomeIntervals.get(i - 1); final SimpleInterval next = nonOverlappingExomeIntervals.get(i); final SimpleInterval query = previous.getContig().equals(next.getContig()) ? ExomeToolsTestUtils.createInterval( previous.getContig(), previous.getEnd() + 1, next.getStart() - 1) : ExomeToolsTestUtils.createInterval(next.getContig(), 1, next.getStart() - 1); result.add(new Object[] {query, null, -i - 1}); } return result.toArray(new Object[result.size()][]); }