Ejemplo n.º 1
0
  /**
   * Gets the header fields from all VCF rods input by the user
   *
   * @param toolkit GATK engine
   * @param rodNames names of rods to use, or null if we should use all possible ones
   * @return a set of all fields
   */
  public static Set<VCFHeaderLine> getHeaderFields(
      GenomeAnalysisEngine toolkit, Collection<String> rodNames) {

    // keep a map of sample name to occurrences encountered
    TreeSet<VCFHeaderLine> fields = new TreeSet<VCFHeaderLine>();

    // iterate to get all of the sample names
    List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
    for (ReferenceOrderedDataSource source : dataSources) {
      // ignore the rod if it's not in our list
      if (rodNames != null && !rodNames.contains(source.getName())) continue;

      if (source.getRecordType().equals(VariantContext.class)) {
        VCFHeader header = (VCFHeader) source.getHeader();
        if (header != null) fields.addAll(header.getMetaDataInSortedOrder());
      }
    }

    return fields;
  }
Ejemplo n.º 2
0
  public void initialize() {
    for (final Tranche t : Tranche.readTranches(TRANCHES_FILE)) {
      if (t.ts >= TS_FILTER_LEVEL) {
        tranches.add(t);
      }
      logger.info(String.format("Read tranche " + t));
    }
    Collections.reverse(
        tranches); // this algorithm wants the tranches ordered from best (lowest truth sensitivity)
                   // to worst (highest truth sensitivity)

    for (final RodBinding rod : input) {
      inputNames.add(rod.getName());
    }

    if (IGNORE_INPUT_FILTERS != null) {
      ignoreInputFilterSet.addAll(Arrays.asList(IGNORE_INPUT_FILTERS));
    }

    // setup the header fields
    final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
    hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
    addVQSRStandardHeaderLines(hInfo);
    final TreeSet<String> samples = new TreeSet<String>();
    samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));

    if (tranches.size() >= 2) {
      for (int iii = 0; iii < tranches.size() - 1; iii++) {
        final Tranche t = tranches.get(iii);
        hInfo.add(
            new VCFFilterHeaderLine(
                t.name,
                String.format(
                    "Truth sensitivity tranche level for "
                        + t.model.toString()
                        + " model at VQS Lod: "
                        + t.minVQSLod
                        + " <= x < "
                        + tranches.get(iii + 1).minVQSLod)));
      }
    }
    if (tranches.size() >= 1) {
      hInfo.add(
          new VCFFilterHeaderLine(
              tranches.get(0).name + "+",
              String.format(
                  "Truth sensitivity tranche level for "
                      + tranches.get(0).model.toString()
                      + " model at VQS Lod < "
                      + tranches.get(0).minVQSLod)));
    } else {
      throw new UserException(
          "No tranches were found in the file or were above the truth sensitivity filter level "
              + TS_FILTER_LEVEL);
    }

    logger.info("Keeping all variants in tranche " + tranches.get(tranches.size() - 1));

    final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
  }