Пример #1
0
  @Test
  public void testCountsFromLocusTraversal() {
    final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
    engine.setGenomeLocParser(genomeLocParser);

    final Collection<SAMReaderID> samFiles = new ArrayList<>();
    final SAMReaderID readerID = new SAMReaderID(testBAM, new Tags());
    samFiles.add(readerID);

    final SAMDataSource dataSource =
        new SAMDataSource(
            samFiles,
            new ThreadAllocation(),
            null,
            genomeLocParser,
            false,
            SAMFileReader.ValidationStringency.STRICT,
            null,
            null,
            new ValidationExclusion(),
            new ArrayList<ReadFilter>(),
            new ArrayList<ReadTransformer>(),
            false,
            (byte) 30,
            false,
            true);

    engine.setReadsDataSource(dataSource);
    final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());

    final TraverseLociNano traverseLociNano = new TraverseLociNano(1);
    final DummyLocusWalker walker = new DummyLocusWalker();
    traverseLociNano.initialize(engine, walker, null);

    for (final Shard shard : dataSource.createShardIteratorOverAllReads(new LocusShardBalancer())) {
      final WindowMaker windowMaker =
          new WindowMaker(
              shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples);
      for (WindowMaker.WindowMakerIterator window : windowMaker) {
        final LocusShardDataProvider dataProvider =
            new LocusShardDataProvider(
                shard,
                shard.getReadProperties(),
                genomeLocParser,
                window.getLocus(),
                window,
                reference,
                new ArrayList<ReferenceOrderedDataSource>());
        traverseLociNano.traverse(walker, dataProvider, 0);
        dataProvider.close();
      }
      windowMaker.close();
    }

    // dataSource.close();
    Assert.assertEquals(
        engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
    Assert.assertEquals(
        engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
  }
Пример #2
0
 // ---------------------------------------------------------------------------------------------------------
 //
 // Public interface functions
 //
 // ---------------------------------------------------------------------------------------------------------
 @Requires({"toolkit != null", "UAC != null"})
 public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
   this(
       toolkit,
       UAC,
       Logger.getLogger(UnifiedGenotyperEngine.class),
       null,
       null,
       SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()),
       VariantContextUtils.DEFAULT_PLOIDY);
 }
Пример #3
0
  private void initializeVcfWriter() {
    final List<String> inputNames = Arrays.asList(validation.getName());

    // setup the header fields
    Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
    hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
    hInfo.add(
        new VCFFilterHeaderLine(
            "bootstrap",
            "This site used for genotype bootstrapping with ProduceBeagleInputWalker"));

    bootstrapVCFOutput.writeHeader(
        new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)));
  }
Пример #4
0
  public void initialize() {

    samples =
        SampleUtils.getSampleListWithVCFHeader(
            getToolkit(), Arrays.asList(variantCollection.variants.getName()));

    beagleWriter.print("marker alleleA alleleB");
    for (String sample : samples)
      beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));

    beagleWriter.println();

    if (bootstrapVCFOutput != null) {
      initializeVcfWriter();
    }

    if (VQSRCalibrationFile != null) {
      VQSRCalibrator = VQSRCalibrationCurve.readFromFile(VQSRCalibrationFile);
      logger.info("Read calibration curve");
      VQSRCalibrator.printInfo(logger);
    }
  }
  /** Initialize the stratifications, evaluations, evaluation contexts, and reporting object */
  public void initialize() {
    // Just list the modules, and exit quickly.
    if (LIST) {
      variantEvalUtils.listModulesAndExit();
    }

    // maintain the full list of comps
    comps.addAll(compsProvided);
    if (dbsnp.dbsnp.isBound()) {
      comps.add(dbsnp.dbsnp);
      knowns.add(dbsnp.dbsnp);
    }

    // Add a dummy comp track if none exists
    if (comps.size() == 0)
      comps.add(
          new RodBinding<VariantContext>(VariantContext.class, "none", "UNBOUND", "", new Tags()));

    // Set up set of additional knowns
    for (RodBinding<VariantContext> compRod : comps) {
      if (KNOWN_NAMES.contains(compRod.getName())) knowns.add(compRod);
    }

    // Now that we have all the rods categorized, determine the sample list from the eval rods.
    Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evals);
    Set<String> vcfSamples =
        SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);

    // Load the sample list
    sampleNamesForEvaluation.addAll(
        SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS));
    numSamples = NUM_SAMPLES > 0 ? NUM_SAMPLES : sampleNamesForEvaluation.size();

    if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) {
      sampleNamesForStratification.addAll(sampleNamesForEvaluation);
    }
    sampleNamesForStratification.add(ALL_SAMPLE_NAME);

    // Initialize select expressions
    for (VariantContextUtils.JexlVCMatchExp jexl :
        VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
      SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
      jexlExpressions.add(sjexl);
    }

    // Initialize the set of stratifications and evaluations to use
    stratificationObjects =
        variantEvalUtils.initializeStratificationObjects(
            this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
    Set<Class<? extends VariantEvaluator>> evaluationObjects =
        variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
    for (VariantStratifier vs : getStratificationObjects()) {
      if (vs.getName().equals("Filter")) byFilterIsEnabled = true;
      else if (vs.getName().equals("Sample")) perSampleIsEnabled = true;
    }

    if (intervalsFile != null) {
      boolean fail = true;
      for (final VariantStratifier vs : stratificationObjects) {
        if (vs.getClass().equals(IntervalStratification.class)) fail = false;
      }
      if (fail)
        throw new UserException.BadArgumentValue(
            "ST", "stratIntervals argument provided but -ST IntervalStratification not provided");
    }

    // Initialize the evaluation contexts
    evaluationContexts =
        variantEvalUtils.initializeEvaluationContexts(
            stratificationObjects, evaluationObjects, null, null);

    // Initialize report table
    report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects);

    // Load ancestral alignments
    if (ancestralAlignmentsFile != null) {
      try {
        ancestralAlignments = new IndexedFastaSequenceFile(ancestralAlignmentsFile);
      } catch (FileNotFoundException e) {
        throw new ReviewedStingException(
            String.format(
                "The ancestral alignments file, '%s', could not be found",
                ancestralAlignmentsFile.getAbsolutePath()));
      }
    }

    // initialize CNVs
    if (knownCNVsFile != null) {
      knownCNVsByContig = createIntervalTreeByContig(knownCNVsFile);
    }
  }
Пример #6
0
  /** Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */
  public void initialize() {
    // Get list of samples to include in the output
    List<String> rodNames = Arrays.asList(variantCollection.variants.getName());

    Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
    TreeSet<String> vcfSamples =
        new TreeSet<String>(
            SampleUtils.getSampleList(
                vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));

    Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
    Collection<String> samplesFromExpressions =
        SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);

    // first, add any requested samples
    samples.addAll(samplesFromFile);
    samples.addAll(samplesFromExpressions);
    samples.addAll(sampleNames);

    // if none were requested, we want all of them
    if (samples.isEmpty()) {
      samples.addAll(vcfSamples);
      NO_SAMPLES_SPECIFIED = true;
    }

    // now, exclude any requested samples
    Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles);
    samples.removeAll(XLsamplesFromFile);
    samples.removeAll(XLsampleNames);

    if (samples.size() == 0 && !NO_SAMPLES_SPECIFIED)
      throw new UserException(
          "All samples requested to be included were also requested to be excluded.");

    for (String sample : samples) logger.info("Including sample '" + sample + "'");

    // if user specified types to include, add these, otherwise, add all possible variant context
    // types to list of vc types to include
    if (TYPES_TO_INCLUDE.isEmpty()) {

      for (VariantContext.Type t : VariantContext.Type.values()) selectedTypes.add(t);

    } else {
      for (VariantContext.Type t : TYPES_TO_INCLUDE) selectedTypes.add(t);
    }
    // Initialize VCF header
    Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
    headerLines.add(new VCFHeaderLine("source", "SelectVariants"));

    if (KEEP_ORIGINAL_CHR_COUNTS) {
      headerLines.add(
          new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
      headerLines.add(
          new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
      headerLines.add(
          new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
    }
    vcfWriter.writeHeader(new VCFHeader(headerLines, samples));

    for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) {
      // It's not necessary that the user supply select names for the JEXL expressions, since those
      // expressions will only be needed for omitting records.  Make up the select names here.
      selectNames.add(String.format("select-%d", i));
    }

    jexls = VariantContextUtils.initializeMatchExps(selectNames, SELECT_EXPRESSIONS);

    // Look at the parameters to decide which analysis to perform
    DISCORDANCE_ONLY = discordanceTrack.isBound();
    if (DISCORDANCE_ONLY)
      logger.info(
          "Selecting only variants discordant with the track: " + discordanceTrack.getName());

    CONCORDANCE_ONLY = concordanceTrack.isBound();
    if (CONCORDANCE_ONLY)
      logger.info(
          "Selecting only variants concordant with the track: " + concordanceTrack.getName());

    if (MENDELIAN_VIOLATIONS) {
      if (FAMILY_STRUCTURE_FILE != null) {
        try {
          for (final String line : new XReadLines(FAMILY_STRUCTURE_FILE)) {
            MendelianViolation mv =
                new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
            if (samples.contains(mv.getSampleChild())
                && samples.contains(mv.getSampleDad())
                && samples.contains(mv.getSampleMom())) mvSet.add(mv);
          }
        } catch (FileNotFoundException e) {
          throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e);
        }
        if (outMVFile != null)
          try {
            outMVFileStream = new PrintStream(outMVFile);
          } catch (FileNotFoundException e) {
            throw new UserException.CouldNotCreateOutputFile(
                outMVFile, "Can't open output file", e);
          }
      } else
        mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
    } else if (!FAMILY_STRUCTURE.isEmpty()) {
      mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
      MENDELIAN_VIOLATIONS = true;
    }

    SELECT_RANDOM_NUMBER = numRandom > 0;
    if (SELECT_RANDOM_NUMBER) {
      logger.info("Selecting " + numRandom + " variants at random from the variant track");
      variantArray = new RandomVariantStructure[numRandom];
    }

    SELECT_RANDOM_FRACTION = fractionRandom > 0;
    if (SELECT_RANDOM_FRACTION)
      logger.info(
          "Selecting approximately "
              + 100.0 * fractionRandom
              + "% of the variants at random from the variant track");
  }
Пример #7
0
  public void initialize() {
    for (final Tranche t : Tranche.readTranches(TRANCHES_FILE)) {
      if (t.ts >= TS_FILTER_LEVEL) {
        tranches.add(t);
      }
      logger.info(String.format("Read tranche " + t));
    }
    Collections.reverse(
        tranches); // this algorithm wants the tranches ordered from best (lowest truth sensitivity)
                   // to worst (highest truth sensitivity)

    for (final RodBinding rod : input) {
      inputNames.add(rod.getName());
    }

    if (IGNORE_INPUT_FILTERS != null) {
      ignoreInputFilterSet.addAll(Arrays.asList(IGNORE_INPUT_FILTERS));
    }

    // setup the header fields
    final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
    hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
    addVQSRStandardHeaderLines(hInfo);
    final TreeSet<String> samples = new TreeSet<String>();
    samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));

    if (tranches.size() >= 2) {
      for (int iii = 0; iii < tranches.size() - 1; iii++) {
        final Tranche t = tranches.get(iii);
        hInfo.add(
            new VCFFilterHeaderLine(
                t.name,
                String.format(
                    "Truth sensitivity tranche level for "
                        + t.model.toString()
                        + " model at VQS Lod: "
                        + t.minVQSLod
                        + " <= x < "
                        + tranches.get(iii + 1).minVQSLod)));
      }
    }
    if (tranches.size() >= 1) {
      hInfo.add(
          new VCFFilterHeaderLine(
              tranches.get(0).name + "+",
              String.format(
                  "Truth sensitivity tranche level for "
                      + tranches.get(0).model.toString()
                      + " model at VQS Lod < "
                      + tranches.get(0).minVQSLod)));
    } else {
      throw new UserException(
          "No tranches were found in the file or were above the truth sensitivity filter level "
              + TS_FILTER_LEVEL);
    }

    logger.info("Keeping all variants in tranche " + tranches.get(tranches.size() - 1));

    final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
  }