public boolean useValidation(VariantContext validation, ReferenceContext ref) { if (goodSite(validation)) { // if using record keeps us below expected proportion, use it logger.debug( String.format( "boot: %d, test: %d, total: %d", bootstrapSetSize, testSetSize, bootstrapSetSize + testSetSize + 1)); if ((bootstrapSetSize + 1.0) / (1.0 + bootstrapSetSize + testSetSize) <= bootstrap) { if (bootstrapVCFOutput != null) { bootstrapVCFOutput.add( new VariantContextBuilder(validation).filters(BOOTSTRAP_FILTER).make()); } bootstrapSetSize++; return true; } else { if (bootstrapVCFOutput != null) { bootstrapVCFOutput.add(validation); } testSetSize++; return false; } } else { if (validation != null && bootstrapVCFOutput != null) { bootstrapVCFOutput.add(validation); } return false; } }
public void onTraversalDone(Integer result) { logger.info(result + " records processed."); if (SELECT_RANDOM_NUMBER) { int positionToPrint = positionToAdd; for (int i = 0; i < numRandom; i++) { vcfWriter.add(variantArray[positionToPrint].vc); positionToPrint = nextCircularPosition(positionToPrint); } } }
private void initializeVcfWriter() { final List<String> inputNames = Arrays.asList(validation.getName()); // setup the header fields Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); hInfo.add( new VCFFilterHeaderLine( "bootstrap", "This site used for genotype bootstrapping with ProduceBeagleInputWalker")); bootstrapVCFOutput.writeHeader( new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames))); }
/** * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for * printing) * * @param tracker the ROD tracker * @param ref reference information * @param context alignment info * @return 1 if the record was printed to the output file, 0 if otherwise */ @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null) return 0; Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation()); if (vcs == null || vcs.size() == 0) { return 0; } for (VariantContext vc : vcs) { if (MENDELIAN_VIOLATIONS) { boolean foundMV = false; for (MendelianViolation mv : mvSet) { if (mv.isViolation(vc)) { foundMV = true; // System.out.println(vc.toString()); if (outMVFile != null) outMVFileStream.format( "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + "childG=%s childGL=%s\n", vc.getChr(), vc.getStart(), vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)), mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), vc.getGenotype(mv.getSampleChild()).toBriefString(), vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString()); } } if (!foundMV) break; } if (DISCORDANCE_ONLY) { Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) return 0; } if (CONCORDANCE_ONLY) { Collection<VariantContext> compVCs = tracker.getValues(concordanceTrack, context.getLocation()); if (!isConcordant(vc, compVCs)) return 0; } if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic()) continue; if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic()) continue; if (!selectedTypes.contains(vc.getType())) continue; VariantContext sub = subsetRecord(vc, samples); if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED)) { for (VariantContextUtils.JexlVCMatchExp jexl : jexls) { if (!VariantContextUtils.match(sub, jexl)) { return 0; } } if (SELECT_RANDOM_NUMBER) { randomlyAddVariant(++variantNumber, sub, ref.getBase()); } else if (!SELECT_RANDOM_FRACTION || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { vcfWriter.add(sub); } } } return 1; }
/** Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */ public void initialize() { // Get list of samples to include in the output List<String> rodNames = Arrays.asList(variantCollection.variants.getName()); Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); TreeSet<String> vcfSamples = new TreeSet<String>( SampleUtils.getSampleList( vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions); // first, add any requested samples samples.addAll(samplesFromFile); samples.addAll(samplesFromExpressions); samples.addAll(sampleNames); // if none were requested, we want all of them if (samples.isEmpty()) { samples.addAll(vcfSamples); NO_SAMPLES_SPECIFIED = true; } // now, exclude any requested samples Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles); samples.removeAll(XLsamplesFromFile); samples.removeAll(XLsampleNames); if (samples.size() == 0 && !NO_SAMPLES_SPECIFIED) throw new UserException( "All samples requested to be included were also requested to be excluded."); for (String sample : samples) logger.info("Including sample '" + sample + "'"); // if user specified types to include, add these, otherwise, add all possible variant context // types to list of vc types to include if (TYPES_TO_INCLUDE.isEmpty()) { for (VariantContext.Type t : VariantContext.Type.values()) selectedTypes.add(t); } else { for (VariantContext.Type t : TYPES_TO_INCLUDE) selectedTypes.add(t); } // Initialize VCF header Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); headerLines.add(new VCFHeaderLine("source", "SelectVariants")); if (KEEP_ORIGINAL_CHR_COUNTS) { headerLines.add( new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC")); headerLines.add( new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF")); headerLines.add( new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN")); } vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) { // It's not necessary that the user supply select names for the JEXL expressions, since those // expressions will only be needed for omitting records. Make up the select names here. selectNames.add(String.format("select-%d", i)); } jexls = VariantContextUtils.initializeMatchExps(selectNames, SELECT_EXPRESSIONS); // Look at the parameters to decide which analysis to perform DISCORDANCE_ONLY = discordanceTrack.isBound(); if (DISCORDANCE_ONLY) logger.info( "Selecting only variants discordant with the track: " + discordanceTrack.getName()); CONCORDANCE_ONLY = concordanceTrack.isBound(); if (CONCORDANCE_ONLY) logger.info( "Selecting only variants concordant with the track: " + concordanceTrack.getName()); if (MENDELIAN_VIOLATIONS) { if (FAMILY_STRUCTURE_FILE != null) { try { for (final String line : new XReadLines(FAMILY_STRUCTURE_FILE)) { MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD); if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom())) mvSet.add(mv); } } catch (FileNotFoundException e) { throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e); } if (outMVFile != null) try { outMVFileStream = new PrintStream(outMVFile); } catch (FileNotFoundException e) { throw new UserException.CouldNotCreateOutputFile( outMVFile, "Can't open output file", e); } } else mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); } else if (!FAMILY_STRUCTURE.isEmpty()) { mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); MENDELIAN_VIOLATIONS = true; } SELECT_RANDOM_NUMBER = numRandom > 0; if (SELECT_RANDOM_NUMBER) { logger.info("Selecting " + numRandom + " variants at random from the variant track"); variantArray = new RandomVariantStructure[numRandom]; } SELECT_RANDOM_FRACTION = fractionRandom > 0; if (SELECT_RANDOM_FRACTION) logger.info( "Selecting approximately " + 100.0 * fractionRandom + "% of the variants at random from the variant track"); }