/** * Read all of the VCF records from source into memory, returning the header and the * VariantContexts * * <p>SHOULD ONLY BE USED FOR UNIT/INTEGRATION TESTING PURPOSES! * * @param source the file to read, must be in VCF4 format * @return * @throws java.io.IOException */ public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException { // read in the features final List<VariantContext> vcs = new ArrayList<VariantContext>(); final VCFCodec codec = new VCFCodec(); PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source)); final LineIterator vcfSource = codec.makeSourceFromStream(pbs); try { final VCFHeader vcfHeader = (VCFHeader) codec.readActualHeader(vcfSource); while (vcfSource.hasNext()) { final VariantContext vc = codec.decode(vcfSource); if (vc != null) vcs.add(vc); } return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs); } finally { codec.close(vcfSource); } }
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null) { // For some reason RodWalkers get map calls with null trackers return 1; } final List<VariantContext> VCs = tracker.getValues(input, context.getLocation()); final List<VariantContext> recals = tracker.getValues(recal, context.getLocation()); for (final VariantContext vc : VCs) { if (VariantDataManager.checkVariationClass(vc, MODE) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()))) { final VariantContext recalDatum = getMatchingRecalVC(vc, recals); if (recalDatum == null) { throw new UserException( "Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc); } final String lodString = recalDatum.getAttributeAsString(VariantRecalibrator.VQS_LOD_KEY, null); if (lodString == null) { throw new UserException( "Encountered a malformed record in the input recal file. There is no lod for the record at: " + vc); } final double lod; try { lod = Double.valueOf(lodString); } catch (NumberFormatException e) { throw new UserException( "Encountered a malformed record in the input recal file. The lod is unreadable for the record at: " + vc); } VariantContextBuilder builder = new VariantContextBuilder(vc); String filterString = null; // Annotate the new record with its VQSLOD and the worst performing annotation builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod); builder.attribute( VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY)); if (recalDatum.hasAttribute(VariantRecalibrator.POSITIVE_LABEL_KEY)) builder.attribute(VariantRecalibrator.POSITIVE_LABEL_KEY, true); if (recalDatum.hasAttribute(VariantRecalibrator.NEGATIVE_LABEL_KEY)) builder.attribute(VariantRecalibrator.NEGATIVE_LABEL_KEY, true); for (int i = tranches.size() - 1; i >= 0; i--) { final Tranche tranche = tranches.get(i); if (lod >= tranche.minVQSLod) { if (i == tranches.size() - 1) { filterString = VCFConstants.PASSES_FILTERS_v4; } else { filterString = tranche.name; } break; } } if (filterString == null) { filterString = tranches.get(0).name + "+"; } if (filterString.equals(VCFConstants.PASSES_FILTERS_v4)) { builder.passFilters(); } else { builder.filters(filterString); } vcfWriter.add(builder.make()); } else { // valid VC but not compatible with this mode, so just emit the variant untouched vcfWriter.add(vc); } } return 1; // This value isn't used for anything }
public void initialize() { for (final Tranche t : Tranche.readTranches(TRANCHES_FILE)) { if (t.ts >= TS_FILTER_LEVEL) { tranches.add(t); } logger.info(String.format("Read tranche " + t)); } Collections.reverse( tranches); // this algorithm wants the tranches ordered from best (lowest truth sensitivity) // to worst (highest truth sensitivity) for (final RodBinding rod : input) { inputNames.add(rod.getName()); } if (IGNORE_INPUT_FILTERS != null) { ignoreInputFilterSet.addAll(Arrays.asList(IGNORE_INPUT_FILTERS)); } // setup the header fields final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames)); addVQSRStandardHeaderLines(hInfo); final TreeSet<String> samples = new TreeSet<String>(); samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); if (tranches.size() >= 2) { for (int iii = 0; iii < tranches.size() - 1; iii++) { final Tranche t = tranches.get(iii); hInfo.add( new VCFFilterHeaderLine( t.name, String.format( "Truth sensitivity tranche level for " + t.model.toString() + " model at VQS Lod: " + t.minVQSLod + " <= x < " + tranches.get(iii + 1).minVQSLod))); } } if (tranches.size() >= 1) { hInfo.add( new VCFFilterHeaderLine( tranches.get(0).name + "+", String.format( "Truth sensitivity tranche level for " + tranches.get(0).model.toString() + " model at VQS Lod < " + tranches.get(0).minVQSLod))); } else { throw new UserException( "No tranches were found in the file or were above the truth sensitivity filter level " + TS_FILTER_LEVEL); } logger.info("Keeping all variants in tranche " + tranches.get(tranches.size() - 1)); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); }