Exemplo n.º 1
0
  /** Initialize the stratifications, evaluations, evaluation contexts, and reporting object */
  public void initialize() {
    // Just list the modules, and exit quickly.
    if (LIST) {
      variantEvalUtils.listModulesAndExit();
    }

    // maintain the full list of comps
    comps.addAll(compsProvided);
    if (dbsnp.dbsnp.isBound()) {
      comps.add(dbsnp.dbsnp);
      knowns.add(dbsnp.dbsnp);
    }

    // Add a dummy comp track if none exists
    if (comps.size() == 0)
      comps.add(
          new RodBinding<VariantContext>(VariantContext.class, "none", "UNBOUND", "", new Tags()));

    // Set up set of additional knowns
    for (RodBinding<VariantContext> compRod : comps) {
      if (KNOWN_NAMES.contains(compRod.getName())) knowns.add(compRod);
    }

    // Now that we have all the rods categorized, determine the sample list from the eval rods.
    Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), evals);
    Set<String> vcfSamples =
        SampleUtils.getSampleList(
            vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);

    // Load the sample list, using an intermediate tree set to sort the samples
    final Set<String> allSampleNames = SampleUtils.getSamplesFromCommandLineInput(vcfSamples);
    sampleNamesForEvaluation.addAll(
        new TreeSet<String>(
            SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS)));
    isSubsettingSamples = !sampleNamesForEvaluation.containsAll(allSampleNames);

    if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) {
      sampleNamesForStratification.addAll(sampleNamesForEvaluation);
    }
    sampleNamesForStratification.add(ALL_SAMPLE_NAME);

    // Initialize select expressions
    for (VariantContextUtils.JexlVCMatchExp jexl :
        VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
      SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
      jexlExpressions.add(sjexl);
    }

    // Initialize the set of stratifications and evaluations to use
    // The list of stratifiers and evaluators to use
    final List<VariantStratifier> stratificationObjects =
        variantEvalUtils.initializeStratificationObjects(
            NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
    final Set<Class<? extends VariantEvaluator>> evaluationClasses =
        variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);

    checkForIncompatibleEvaluatorsAndStratifiers(stratificationObjects, evaluationClasses);

    for (VariantStratifier vs : stratificationObjects) {
      if (vs.getName().equals("Filter")) byFilterIsEnabled = true;
      else if (vs.getName().equals("Sample")) perSampleIsEnabled = true;
    }

    if (intervalsFile != null) {
      boolean fail = true;
      for (final VariantStratifier vs : stratificationObjects) {
        if (vs.getClass().equals(IntervalStratification.class)) fail = false;
      }
      if (fail)
        throw new UserException.BadArgumentValue(
            "ST", "stratIntervals argument provided but -ST IntervalStratification not provided");
    }

    // Initialize the evaluation contexts
    createStratificationStates(stratificationObjects, evaluationClasses);

    // Load ancestral alignments
    if (ancestralAlignmentsFile != null) {
      try {
        ancestralAlignments = new IndexedFastaSequenceFile(ancestralAlignmentsFile);
      } catch (FileNotFoundException e) {
        throw new ReviewedStingException(
            String.format(
                "The ancestral alignments file, '%s', could not be found",
                ancestralAlignmentsFile.getAbsolutePath()));
      }
    }

    // initialize CNVs
    if (knownCNVsFile != null) {
      knownCNVsByContig = createIntervalTreeByContig(knownCNVsFile);
    }
  }
Exemplo n.º 2
0
public class ReadMetricsUnitTest extends BaseTest {

  @Test
  public void testReadsSeenDoNotOverflowInt() {

    final ReadMetrics metrics = new ReadMetrics();

    final long moreThanMaxInt = ((long) Integer.MAX_VALUE) + 1L;

    for (long i = 0L; i < moreThanMaxInt; i++) {
      metrics.incrementNumReadsSeen();
    }

    Assert.assertEquals(metrics.getNumReadsSeen(), moreThanMaxInt);
    Assert.assertTrue(metrics.getNumReadsSeen() > (long) Integer.MAX_VALUE);

    logger.warn(String.format("%d %d %d", Integer.MAX_VALUE, moreThanMaxInt, Long.MAX_VALUE));
  }

  // Test the accuracy of the read metrics

  private IndexedFastaSequenceFile reference;
  private SAMSequenceDictionary dictionary;
  private SAMFileHeader header;
  private GATKSAMReadGroupRecord readGroup;
  private GenomeLocParser genomeLocParser;
  private File testBAM;

  private static final int numReadsPerContig = 250000;
  private static final List<String> contigs = Arrays.asList("1", "2", "3");

  @BeforeClass
  private void init() throws IOException {
    reference = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
    dictionary = reference.getSequenceDictionary();
    genomeLocParser = new GenomeLocParser(dictionary);
    header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
    header.setSequenceDictionary(dictionary);
    header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
    readGroup = new GATKSAMReadGroupRecord(header.getReadGroup("test"));

    final List<GATKSAMRecord> reads = new ArrayList<>();
    for (final String contig : contigs) {
      for (int i = 1; i <= numReadsPerContig; i++) {
        reads.add(buildSAMRecord("read" + contig + "_" + i, contig, i));
      }
    }

    createBAM(reads);
  }

  private void createBAM(final List<GATKSAMRecord> reads) throws IOException {
    testBAM = File.createTempFile("TraverseActiveRegionsUnitTest", ".bam");
    testBAM.deleteOnExit();

    SAMFileWriter out =
        new SAMFileWriterFactory()
            .setCreateIndex(true)
            .makeBAMWriter(reads.get(0).getHeader(), true, testBAM);
    for (GATKSAMRecord read : reads) {
      out.addAlignment(read);
    }
    out.close();

    new File(testBAM.getAbsolutePath().replace(".bam", ".bai")).deleteOnExit();
    new File(testBAM.getAbsolutePath() + ".bai").deleteOnExit();
  }

  // copied from LocusViewTemplate
  protected GATKSAMRecord buildSAMRecord(
      final String readName, final String contig, final int alignmentStart) {
    GATKSAMRecord record = new GATKSAMRecord(header);

    record.setReadName(readName);
    record.setReferenceIndex(dictionary.getSequenceIndex(contig));
    record.setAlignmentStart(alignmentStart);

    record.setCigarString("1M");
    record.setReadString("A");
    record.setBaseQualityString("A");
    record.setReadGroup(readGroup);

    return record;
  }

  @Test
  public void testCountsFromReadTraversal() {
    final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
    engine.setGenomeLocParser(genomeLocParser);

    final Collection<SAMReaderID> samFiles = new ArrayList<>();
    final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
    samFiles.add(readerID);

    final SAMDataSource dataSource =
        new SAMDataSource(
            samFiles,
            new ThreadAllocation(),
            null,
            genomeLocParser,
            false,
            SAMFileReader.ValidationStringency.STRICT,
            null,
            null,
            new ValidationExclusion(),
            new ArrayList<ReadFilter>(),
            new ArrayList<ReadTransformer>(),
            false,
            (byte) 30,
            false,
            true);

    engine.setReadsDataSource(dataSource);

    final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1);
    final DummyReadWalker walker = new DummyReadWalker();
    traverseReadsNano.initialize(engine, walker, null);

    for (final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer())) {
      final ReadShardDataProvider dataProvider =
          new ReadShardDataProvider(
              shard,
              engine.getGenomeLocParser(),
              dataSource.seek(shard),
              reference,
              new ArrayList<ReferenceOrderedDataSource>());
      traverseReadsNano.traverse(walker, dataProvider, 0);
      dataProvider.close();
    }

    Assert.assertEquals(
        engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
    Assert.assertEquals(
        engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
  }

  @Test
  public void testCountsFromLocusTraversal() {
    final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
    engine.setGenomeLocParser(genomeLocParser);

    final Collection<SAMReaderID> samFiles = new ArrayList<>();
    final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
    samFiles.add(readerID);

    final SAMDataSource dataSource =
        new SAMDataSource(
            samFiles,
            new ThreadAllocation(),
            null,
            genomeLocParser,
            false,
            SAMFileReader.ValidationStringency.STRICT,
            null,
            null,
            new ValidationExclusion(),
            new ArrayList<ReadFilter>(),
            new ArrayList<ReadTransformer>(),
            false,
            (byte) 30,
            false,
            true);

    engine.setReadsDataSource(dataSource);
    final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());

    final TraverseLociNano traverseLociNano = new TraverseLociNano(1);
    final DummyLocusWalker walker = new DummyLocusWalker();
    traverseLociNano.initialize(engine, walker, null);

    for (final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer())) {
      final WindowMaker windowMaker =
          new WindowMaker(
              shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
      for (WindowMaker.WindowMakerIterator window : windowMaker) {
        final LocusShardDataProvider dataProvider =
            new LocusShardDataProvider(
                shard,
                shard.getReadProperties(),
                genomeLocParser,
                window.getLocus(),
                window,
                reference,
                new ArrayList<ReferenceOrderedDataSource>());
        traverseLociNano.traverse(walker, dataProvider, 0);
        dataProvider.close();
      }
      windowMaker.close();
    }

    // dataSource.close();
    Assert.assertEquals(
        engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
    Assert.assertEquals(
        engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
  }

  @Test
  public void testCountsFromActiveRegionTraversal() {
    final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
    engine.setGenomeLocParser(genomeLocParser);

    final Collection<SAMReaderID> samFiles = new ArrayList<>();
    final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
    samFiles.add(readerID);

    final SAMDataSource dataSource =
        new SAMDataSource(
            samFiles,
            new ThreadAllocation(),
            null,
            genomeLocParser,
            false,
            SAMFileReader.ValidationStringency.STRICT,
            null,
            null,
            new ValidationExclusion(),
            new ArrayList<ReadFilter>(),
            new ArrayList<ReadTransformer>(),
            false,
            (byte) 30,
            false,
            true);

    engine.setReadsDataSource(dataSource);
    final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());

    final List<GenomeLoc> intervals = new ArrayList<>(contigs.size());
    for (final String contig : contigs)
      intervals.add(genomeLocParser.createGenomeLoc(contig, 1, numReadsPerContig));

    final TraverseActiveRegions traverseActiveRegions = new TraverseActiveRegions();
    final DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
    traverseActiveRegions.initialize(engine, walker, null);

    for (final Shard shard :
        dataSource.createShardIteratorOverIntervals(
            new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer())) {
      final WindowMaker windowMaker =
          new WindowMaker(
              shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
      for (WindowMaker.WindowMakerIterator window : windowMaker) {
        final LocusShardDataProvider dataProvider =
            new LocusShardDataProvider(
                shard,
                shard.getReadProperties(),
                genomeLocParser,
                window.getLocus(),
                window,
                reference,
                new ArrayList<ReferenceOrderedDataSource>());
        traverseActiveRegions.traverse(walker, dataProvider, 0);
        dataProvider.close();
      }
      windowMaker.close();
    }

    Assert.assertEquals(
        engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
    Assert.assertEquals(
        engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
  }

  @Test
  public void testFilteredCounts() {
    final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
    engine.setGenomeLocParser(genomeLocParser);

    final Collection<SAMReaderID> samFiles = new ArrayList<>();
    final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
    samFiles.add(readerID);

    final List<ReadFilter> filters = new ArrayList<>();
    filters.add(new EveryTenthReadFilter());

    final SAMDataSource dataSource =
        new SAMDataSource(
            samFiles,
            new ThreadAllocation(),
            null,
            genomeLocParser,
            false,
            SAMFileReader.ValidationStringency.STRICT,
            null,
            null,
            new ValidationExclusion(),
            filters,
            new ArrayList<ReadTransformer>(),
            false,
            (byte) 30,
            false,
            true);

    engine.setReadsDataSource(dataSource);

    final TraverseReadsNano traverseReadsNano = new TraverseReadsNano(1);
    final DummyReadWalker walker = new DummyReadWalker();
    traverseReadsNano.initialize(engine, walker, null);

    for (final Shard shard : dataSource.createShardIteratorOverAllReads(new ReadShardBalancer())) {
      final ReadShardDataProvider dataProvider =
          new ReadShardDataProvider(
              shard,
              engine.getGenomeLocParser(),
              dataSource.seek(shard),
              reference,
              new ArrayList<ReferenceOrderedDataSource>());
      traverseReadsNano.traverse(walker, dataProvider, 0);
      dataProvider.close();
    }

    Assert.assertEquals(
        (long)
            engine
                .getCumulativeMetrics()
                .getCountsByFilter()
                .get(EveryTenthReadFilter.class.getSimpleName()),
        contigs.size() * numReadsPerContig / 10);
  }

  class DummyLocusWalker extends LocusWalker<Integer, Integer> {
    @Override
    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
      return 0;
    }

    @Override
    public Integer reduceInit() {
      return 0;
    }

    @Override
    public Integer reduce(Integer value, Integer sum) {
      return 0;
    }
  }

  class DummyReadWalker extends ReadWalker<Integer, Integer> {
    @Override
    public Integer map(
        ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
      return 0;
    }

    @Override
    public Integer reduceInit() {
      return 0;
    }

    @Override
    public Integer reduce(Integer value, Integer sum) {
      return 0;
    }
  }

  class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
    @Override
    public ActivityProfileState isActive(
        RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
      return new ActivityProfileState(ref.getLocus(), 0.0);
    }

    @Override
    public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
      return 0;
    }

    @Override
    public Integer reduceInit() {
      return 0;
    }

    @Override
    public Integer reduce(Integer value, Integer sum) {
      return 0;
    }
  }

  private final class EveryTenthReadFilter extends ReadFilter {

    private int myCounter = 0;

    @Override
    public boolean filterOut(final SAMRecord record) {
      if (++myCounter == 10) {
        myCounter = 0;
        return true;
      }

      return false;
    }
  }
}