public static Map<String, VCFHeader> getVCFHeadersFromRods( GenomeAnalysisEngine toolkit, Collection<String> rodNames) { Map<String, VCFHeader> data = new HashMap<String, VCFHeader>(); // iterate to get all of the sample names List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources(); for (ReferenceOrderedDataSource source : dataSources) { // ignore the rod if it's not in our list if (rodNames != null && !rodNames.contains(source.getName())) continue; if (source.getHeader() != null && source.getHeader() instanceof VCFHeader) data.put(source.getName(), (VCFHeader) source.getHeader()); } return data; }
/** * Gets the appropriately formatted header for a VCF file describing this GATK run * * @param engine the GATK engine that holds the walker name, GATK version, and other information * @param argumentSources contains information on the argument values provided to the GATK for * converting to a command line string. Should be provided from the data in the parsing * engine. Can be empty in which case the command line will be the empty string. * @return VCF header line describing this run of the GATK. */ public static VCFHeaderLine getCommandLineArgumentHeaderLine( final GenomeAnalysisEngine engine, final Collection<Object> argumentSources) { if (engine == null) throw new IllegalArgumentException("engine cannot be null"); if (argumentSources == null) throw new IllegalArgumentException("argumentSources cannot be null"); final Map<String, String> attributes = new LinkedHashMap<>(); attributes.put("ID", engine.getWalkerName()); attributes.put("Version", CommandLineGATK.getVersionNumber()); final Date date = new Date(); attributes.put("Date", date.toString()); attributes.put("Epoch", Long.toString(date.getTime())); attributes.put( "CommandLineOptions", engine.createApproximateCommandLineArgumentString(argumentSources.toArray())); return new VCFSimpleHeaderLine(GATK_COMMAND_LINE_KEY, attributes); }
/** * Gets the header fields from all VCF rods input by the user * * @param toolkit GATK engine * @param rodNames names of rods to use, or null if we should use all possible ones * @return a set of all fields */ public static Set<VCFHeaderLine> getHeaderFields( GenomeAnalysisEngine toolkit, Collection<String> rodNames) { // keep a map of sample name to occurrences encountered TreeSet<VCFHeaderLine> fields = new TreeSet<VCFHeaderLine>(); // iterate to get all of the sample names List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources(); for (ReferenceOrderedDataSource source : dataSources) { // ignore the rod if it's not in our list if (rodNames != null && !rodNames.contains(source.getName())) continue; if (source.getRecordType().equals(VariantContext.class)) { VCFHeader header = (VCFHeader) source.getHeader(); if (header != null) fields.addAll(header.getMetaDataInSortedOrder()); } } return fields; }