public void initialize() { if (variants.size() != 2) { throw new UserException.BadArgumentValue( "variant", "this tool requires exactly 2 input variant files"); } source1 = variants.get(0).getName(); source2 = variants.get(1).getName(); if (SET_KEY.toLowerCase().equals("null")) SET_KEY = null; if (STATUS_KEY.toLowerCase().equals("null")) STATUS_KEY = null; // for now, INFO and FORMAT fields are not propagated to the output VCF (so they aren't put into // the header) Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>(); if (SET_KEY != null) headerLines.add( new VCFInfoHeaderLine( SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record")); if (STATUS_KEY != null) headerLines.add( new VCFInfoHeaderLine( STATUS_KEY, 1, VCFHeaderLineType.String, "Extent to which records match")); final VCFHeader vcfHeader = new VCFHeader(headerLines, Collections.<String>emptySet()); baseWriter.writeHeader(vcfHeader); writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, ACTIVE_WINDOW); }
public void initialize() { vcfwriter = VariantContextWriterFactory.sortOnTheFly(baseWriter, 40, false); variantOverlapAnnotator = new VariantOverlapAnnotator(dbsnp.dbsnp, getToolkit().getGenomeLocParser()); }
@Override protected int execute() throws Exception { BasicConfigurator.configure(); logger.setLevel(Level.INFO); final ReferenceSequenceFile ref; try { ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile); } catch (Exception e) { throw new UserException("Couldn't load provided reference sequence file " + refFile, e); } variant = parseVariantList(variant); Comparator<Pair<Integer, File>> positionComparator = new PositionComparator(); Queue<Pair<Integer, File>> priorityQueue; if (assumeSorted) priorityQueue = new LinkedList<>(); else priorityQueue = new PriorityQueue<>(10000, positionComparator); FileType fileType = null; for (File file : variant) { // if it returns a valid type, it will be the same for all files fileType = fileExtensionCheck(file, fileType); if (fileType == FileType.INVALID) return 1; if (assumeSorted) { priorityQueue.add(new Pair<>(0, file)); } else { if (!file.exists()) { throw new UserException(String.format("File %s doesn't exist", file.getAbsolutePath())); } FeatureReader<VariantContext> reader = getFeatureReader(fileType, file); Iterator<VariantContext> it = reader.iterator(); if (!it.hasNext()) { System.err.println( String.format("File %s is empty. This file will be ignored", file.getAbsolutePath())); continue; } VariantContext vc = it.next(); int firstPosition = vc.getStart(); reader.close(); priorityQueue.add(new Pair<>(firstPosition, file)); } } FileOutputStream outputStream = new FileOutputStream(outputFile); EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY); IndexCreator idxCreator = GATKVCFUtils.makeIndexCreator( variant_index_type, variant_index_parameter, outputFile, ref.getSequenceDictionary()); final VariantContextWriter outputWriter = VariantContextWriterFactory.create( outputFile, outputStream, ref.getSequenceDictionary(), idxCreator, options); boolean firstFile = true; int count = 0; while (!priorityQueue.isEmpty()) { count++; File file = priorityQueue.remove().getSecond(); if (!file.exists()) { throw new UserException(String.format("File %s doesn't exist", file.getAbsolutePath())); } FeatureReader<VariantContext> reader = getFeatureReader(fileType, file); if (count % 10 == 0) System.out.print(count); else System.out.print("."); if (firstFile) { VCFHeader header = (VCFHeader) reader.getHeader(); outputWriter.writeHeader(header); firstFile = false; } Iterator<VariantContext> it = reader.iterator(); while (it.hasNext()) { VariantContext vc = it.next(); outputWriter.add(vc); } reader.close(); } System.out.println(); outputWriter.close(); return 0; }