/**
   * Read all of the VCF records from source into memory, returning the header and the
   * VariantContexts
   *
   * <p>SHOULD ONLY BE USED FOR UNIT/INTEGRATION TESTING PURPOSES!
   *
   * @param source the file to read, must be in VCF4 format
   * @return
   * @throws java.io.IOException
   */
  public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source)
      throws IOException {
    // read in the features
    final List<VariantContext> vcs = new ArrayList<VariantContext>();
    final VCFCodec codec = new VCFCodec();
    PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
    final LineIterator vcfSource = codec.makeSourceFromStream(pbs);
    try {
      final VCFHeader vcfHeader = (VCFHeader) codec.readActualHeader(vcfSource);

      while (vcfSource.hasNext()) {
        final VariantContext vc = codec.decode(vcfSource);
        if (vc != null) vcs.add(vc);
      }

      return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
    } finally {
      codec.close(vcfSource);
    }
  }
Exemplo n.º 2
0
  public void initialize() {
    for (final Tranche t : Tranche.readTranches(TRANCHES_FILE)) {
      if (t.ts >= TS_FILTER_LEVEL) {
        tranches.add(t);
      }
      logger.info(String.format("Read tranche " + t));
    }
    Collections.reverse(
        tranches); // this algorithm wants the tranches ordered from best (lowest truth sensitivity)
                   // to worst (highest truth sensitivity)

    for (final RodBinding rod : input) {
      inputNames.add(rod.getName());
    }

    if (IGNORE_INPUT_FILTERS != null) {
      ignoreInputFilterSet.addAll(Arrays.asList(IGNORE_INPUT_FILTERS));
    }

    // setup the header fields
    final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
    hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
    addVQSRStandardHeaderLines(hInfo);
    final TreeSet<String> samples = new TreeSet<String>();
    samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));

    if (tranches.size() >= 2) {
      for (int iii = 0; iii < tranches.size() - 1; iii++) {
        final Tranche t = tranches.get(iii);
        hInfo.add(
            new VCFFilterHeaderLine(
                t.name,
                String.format(
                    "Truth sensitivity tranche level for "
                        + t.model.toString()
                        + " model at VQS Lod: "
                        + t.minVQSLod
                        + " <= x < "
                        + tranches.get(iii + 1).minVQSLod)));
      }
    }
    if (tranches.size() >= 1) {
      hInfo.add(
          new VCFFilterHeaderLine(
              tranches.get(0).name + "+",
              String.format(
                  "Truth sensitivity tranche level for "
                      + tranches.get(0).model.toString()
                      + " model at VQS Lod < "
                      + tranches.get(0).minVQSLod)));
    } else {
      throw new UserException(
          "No tranches were found in the file or were above the truth sensitivity filter level "
              + TS_FILTER_LEVEL);
    }

    logger.info("Keeping all variants in tranche " + tranches.get(tranches.size() - 1));

    final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
  }