@Test(dependsOnMethods = "testCorrectInitialization")
 public void testExonByName() {
   for (final SimpleInterval loc : nonOverlappingExomeIntervals) {
     Assert.assertEquals(exonDB.target(loc.toString()), loc);
   }
   Assert.assertNull(exonDB.target("no-id"));
 }
 @Test(dependsOnMethods = {"testCorrectInitialization"})
 public void testExomeSize() {
   int size = 0;
   for (final SimpleInterval loc : nonOverlappingExomeIntervals) {
     size += loc.size();
   }
   Assert.assertEquals(exonDB.exomeSize(), size);
 }
 /**
  * Target information string composer for the genomic coordinate part of the target.
  *
  * @param index the index of a target within the collection.
  * @param collection the containing target collection.
  * @return never {@code null}.
  */
 private static String coordinateComposer(
     final int index, final TargetCollection<Target> collection) {
   final SimpleInterval location = collection.location(index);
   if (location == null) {
     return String.join(COLUMN_SEPARATOR, NO_VALUE_STRING, NO_VALUE_STRING, NO_VALUE_STRING);
   } else {
     return String.format(
         String.join(COLUMN_SEPARATOR, "%s", "%d", "%d"),
         location.getContig(),
         location.getStart(),
         location.getEnd());
   }
 }
 // returns all the intervals that overlap with the query.
 // (use the optimized version instead, unless you're testing it and need something to compare
 // against)
 protected ArrayList<T> getOverlappingIgnoringIndex(SimpleInterval query) {
   if (!contig.equals(query.getContig())) {
     // different contig, so we know no one'll overlap.
     return new ArrayList<T>();
   }
   ArrayList<T> ret = new ArrayList<T>();
   for (T v : vs) {
     // they are sorted by start location, so if this one starts too late
     // then all of the others will, too.
     if (v.getStart() > query.getEnd()) {
       break;
     }
     if (query.overlaps(v)) {
       ret.add(v);
     }
   }
   return ret;
 }
  /**
   * Writes the row in the main matrix output file for a target and, if requested, the corresponding
   * row in the row summary output file.
   *
   * @param countBuffer the counts for the target.
   * @param index the index of target within the target collection.
   */
  private void writeOutputRows(
      final int[] countBuffer, final long[] columnTotals, final int index) {
    final String countString =
        IntStream.range(0, countBuffer.length)
            .mapToObj(i -> transform.apply(countBuffer[i], columnTotals[i]))
            .collect(Collectors.joining(COLUMN_SEPARATOR));
    final String targetInfoString =
        targetOutInfo.composeTargetOutInfoString(index, targetCollection);

    outputWriter.println(String.join(COLUMN_SEPARATOR, targetInfoString, countString));

    if (rowSummaryOutputWriter != null) {
      final long sum = MathUtils.sum(countBuffer);
      final SimpleInterval location = targetCollection.location(index);
      final int targetSize = location.size();
      rowSummaryOutputWriter.println(
          String.join(
              COLUMN_SEPARATOR,
              targetInfoString,
              Long.toString(sum),
              String.format(
                  AVERAGE_DOUBLE_FORMAT, sum / ((float) countColumns.columnCount() * targetSize))));
    }
  }
 /**
  * Returns all the intervals that overlap with the query. The query doesn't *have* to be in the
  * same contig as the intervals we hold, but of course if it isn't you'll get an empty result. You
  * may modify the returned list.
  */
 public ArrayList<T> getOverlapping(SimpleInterval query) {
   if (!contig.equals(query.getContig())) {
     // different contig, so we know no one'll overlap.
     return new ArrayList<T>();
   }
   ArrayList<T> ret = new ArrayList<T>();
   // use index to skip early non-overlapping entries.
   int idx = firstPotentiallyReaching(query.getStart());
   if (idx < 0) {
     idx = 0;
   }
   for (; idx < vs.size(); idx++) {
     T v = vs.get(idx);
     // they are sorted by start location, so if this one starts too late
     // then all of the others will, too.
     if (v.getStart() > query.getEnd()) {
       break;
     }
     if (query.overlaps(v)) {
       ret.add(v);
     }
   }
   return ret;
 }
  /**
   * Set expanded window boundaries, subject to cropping at contig boundaries
   *
   * <p>Allows the client to request a specific number of extra reference bases to include before
   * and after the bases within our interval. These extra bases will be returned by calls to {@link
   * #getBases} and {@link #iterator} in addition to the bases spanning our actual interval.
   *
   * <p>Note that the true window size may be smaller than requested due to cropping at contig
   * boundaries. Call {@link @numWindowLeadingBases} and {@link @numWindowTrailingBases} to get the
   * actual window dimensions.
   *
   * @param windowLeadingBases Number of extra reference bases to include before the start of our
   *     interval. Must be >= 0.
   * @param windowTrailingBases Number of extra reference bases to include after the end of our
   *     interval. Must be >= 0.
   */
  public void setWindow(final int windowLeadingBases, final int windowTrailingBases) {
    if (windowLeadingBases < 0)
      throw new GATKException("Reference window starts after the current interval");
    if (windowTrailingBases < 0)
      throw new GATKException("Reference window ends before the current interval");

    if (interval == null || (windowLeadingBases == 0 && windowTrailingBases == 0)) {
      // the "windowless" case
      window = interval;
    } else {
      window =
          new SimpleInterval(
              interval.getContig(),
              calculateWindowStart(interval, windowLeadingBases),
              calculateWindowStop(interval, windowTrailingBases));
    }

    // Changing the window size invalidates our cached query result
    cachedSequence = null;
  }
  public static JavaPairRDD<GATKRead, Iterable<GATKVariant>> join(
      final JavaRDD<GATKRead> reads, final JavaRDD<GATKVariant> variants) {
    final JavaSparkContext ctx = new JavaSparkContext(reads.context());
    final IntervalsSkipList<GATKVariant> variantSkipList =
        new IntervalsSkipList<>(variants.collect());
    final Broadcast<IntervalsSkipList<GATKVariant>> variantsBroadcast =
        ctx.broadcast(variantSkipList);

    return reads.mapToPair(
        r -> {
          final IntervalsSkipList<GATKVariant> intervalsSkipList = variantsBroadcast.getValue();
          if (SimpleInterval.isValid(r.getContig(), r.getStart(), r.getEnd())) {
            return new Tuple2<>(r, intervalsSkipList.getOverlapping(new SimpleInterval(r)));
          } else {
            // Sometimes we have reads that do not form valid intervals (reads that do not consume
            // any ref bases, eg CIGAR 61S90I
            // In those cases, we'll just say that nothing overlaps the read
            return new Tuple2<>(r, Collections.emptyList());
          }
        });
  }
  public static JavaPairRDD<GATKRead, ReferenceBases> addBases(
      final ReferenceDataflowSource referenceDataflowSource, final JavaRDD<GATKRead> reads) {
    SerializableFunction<GATKRead, SimpleInterval> windowFunction =
        referenceDataflowSource.getReferenceWindowFunction();

    JavaPairRDD<ReferenceShard, GATKRead> shardRead =
        reads.mapToPair(
            gatkRead -> {
              ReferenceShard shard =
                  ReferenceShard.getShardNumberFromInterval(windowFunction.apply(gatkRead));
              return new Tuple2<>(shard, gatkRead);
            });

    JavaPairRDD<ReferenceShard, Iterable<GATKRead>> shardiRead = shardRead.groupByKey();

    return shardiRead.flatMapToPair(
        in -> {
          List<Tuple2<GATKRead, ReferenceBases>> out = Lists.newArrayList();
          Iterable<GATKRead> iReads = in._2();

          // Apply the reference window function to each read to produce a set of intervals
          // representing
          // the desired reference bases for each read.
          final List<SimpleInterval> readWindows =
              StreamSupport.stream(iReads.spliterator(), false)
                  .map(read -> windowFunction.apply(read))
                  .collect(Collectors.toList());

          SimpleInterval interval = SimpleInterval.getSpanningInterval(readWindows);
          ReferenceBases bases = referenceDataflowSource.getReferenceBases(null, interval);
          for (GATKRead r : iReads) {
            final ReferenceBases subset = bases.getSubset(windowFunction.apply(r));
            out.add(new Tuple2<>(r, subset));
          }
          return out;
        });
  }
Example #10
0
 /**
  * Determines the stop of the expanded reference window, bounded if necessary by the contig.
  *
  * @param locus The locus to expand.
  * @param windowTrailingBases number of bases to attempt to expand relative to the locus end (>=
  *     0)
  * @return The end of the expanded window.
  */
 private int calculateWindowStop(final SimpleInterval locus, final int windowTrailingBases) {
   final int sequenceLength =
       dataSource.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength();
   return Math.min(locus.getEnd() + windowTrailingBases, sequenceLength);
 }
Example #11
0
 /**
  * Determines the start of the expanded reference window, bounded by 1.
  *
  * @param locus The locus to expand.
  * @param windowLeadingBases number of bases to attempt to expand relative to the locus start (>=
  *     0)
  * @return The start of the expanded window.
  */
 private int calculateWindowStart(final SimpleInterval locus, final int windowLeadingBases) {
   return Math.max(locus.getStart() - windowLeadingBases, 1);
 }
Example #12
0
 /**
  * Get the number of extra bases of context after the end of our interval, as configured by a call
  * to {@link #setWindow} or at construction time.
  *
  * <p>Actual number of bases may be less than originally requested if the interval is near a
  * contig boundary.
  *
  * @return number of extra bases of context after the end of our interval
  */
 public int numWindowTrailingBases() {
   return window == null ? 0 : window.getEnd() - interval.getEnd();
 }
Example #13
0
 /**
  * Get the number of extra bases of context before the start of our interval, as configured by a
  * call to {@link #setWindow} or at construction time.
  *
  * <p>Actual number of bases may be less than originally requested if the interval is near a
  * contig boundary.
  *
  * @return number of extra bases of context before the start of our interval
  */
 public int numWindowLeadingBases() {
   return window == null ? 0 : interval.getStart() - window.getStart();
 }
 @DataProvider(name = "exonLookUpData")
 public Object[][] exonLookUpData() {
   final List<Object[]> result = new ArrayList<>();
   for (int i = 0; i < nonOverlappingExomeIntervals.size(); i++) {
     result.add(
         new Object[] {
           nonOverlappingExomeIntervals.get(i), nonOverlappingExomeIntervals.get(i), i
         });
   }
   for (int i = 0; i < nonOverlappingExomeIntervals.size(); i++) {
     final SimpleInterval interval = nonOverlappingExomeIntervals.get(i);
     result.add(
         new Object[] {
           new SimpleInterval(interval.getContig(), interval.getStart(), interval.getStart()),
           interval,
           i
         });
   }
   for (int i = 0; i < nonOverlappingExomeIntervals.size(); i++) {
     final SimpleInterval interval = nonOverlappingExomeIntervals.get(i);
     result.add(
         new Object[] {
           new SimpleInterval(interval.getContig(), interval.getEnd(), interval.getEnd()),
           interval,
           i
         });
   }
   for (int i = 1; i < nonOverlappingExomeIntervals.size(); i++) {
     final SimpleInterval previous = nonOverlappingExomeIntervals.get(i - 1);
     final SimpleInterval next = nonOverlappingExomeIntervals.get(i);
     final SimpleInterval query =
         previous.getContig().equals(next.getContig())
             ? ExomeToolsTestUtils.createInterval(
                 previous.getContig(), previous.getEnd() + 1, next.getStart() - 1)
             : ExomeToolsTestUtils.createInterval(next.getContig(), 1, next.getStart() - 1);
     result.add(new Object[] {query, null, -i - 1});
   }
   return result.toArray(new Object[result.size()][]);
 }