Ejemplo n.º 1
0
  public static Map<String, VCFHeader> getVCFHeadersFromRods(
      GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
    Map<String, VCFHeader> data = new HashMap<String, VCFHeader>();

    // iterate to get all of the sample names
    List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
    for (ReferenceOrderedDataSource source : dataSources) {
      // ignore the rod if it's not in our list
      if (rodNames != null && !rodNames.contains(source.getName())) continue;

      if (source.getHeader() != null && source.getHeader() instanceof VCFHeader)
        data.put(source.getName(), (VCFHeader) source.getHeader());
    }

    return data;
  }
Ejemplo n.º 2
0
  /**
   * Gets the appropriately formatted header for a VCF file describing this GATK run
   *
   * @param engine the GATK engine that holds the walker name, GATK version, and other information
   * @param argumentSources contains information on the argument values provided to the GATK for
   *     converting to a command line string. Should be provided from the data in the parsing
   *     engine. Can be empty in which case the command line will be the empty string.
   * @return VCF header line describing this run of the GATK.
   */
  public static VCFHeaderLine getCommandLineArgumentHeaderLine(
      final GenomeAnalysisEngine engine, final Collection<Object> argumentSources) {
    if (engine == null) throw new IllegalArgumentException("engine cannot be null");
    if (argumentSources == null)
      throw new IllegalArgumentException("argumentSources cannot be null");

    final Map<String, String> attributes = new LinkedHashMap<>();
    attributes.put("ID", engine.getWalkerName());
    attributes.put("Version", CommandLineGATK.getVersionNumber());
    final Date date = new Date();
    attributes.put("Date", date.toString());
    attributes.put("Epoch", Long.toString(date.getTime()));
    attributes.put(
        "CommandLineOptions",
        engine.createApproximateCommandLineArgumentString(argumentSources.toArray()));
    return new VCFSimpleHeaderLine(GATK_COMMAND_LINE_KEY, attributes);
  }
Ejemplo n.º 3
0
  /**
   * Gets the header fields from all VCF rods input by the user
   *
   * @param toolkit GATK engine
   * @param rodNames names of rods to use, or null if we should use all possible ones
   * @return a set of all fields
   */
  public static Set<VCFHeaderLine> getHeaderFields(
      GenomeAnalysisEngine toolkit, Collection<String> rodNames) {

    // keep a map of sample name to occurrences encountered
    TreeSet<VCFHeaderLine> fields = new TreeSet<VCFHeaderLine>();

    // iterate to get all of the sample names
    List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
    for (ReferenceOrderedDataSource source : dataSources) {
      // ignore the rod if it's not in our list
      if (rodNames != null && !rodNames.contains(source.getName())) continue;

      if (source.getRecordType().equals(VariantContext.class)) {
        VCFHeader header = (VCFHeader) source.getHeader();
        if (header != null) fields.addAll(header.getMetaDataInSortedOrder());
      }
    }

    return fields;
  }