/** * Checks if vc has a variant call for (at least one of) the samples. * * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for * discordances to (e.g. HapMap) * @param compVCs the comparison VariantContext (discordance * @return */ private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) { if (vc == null) return false; // if we're not looking at specific samples then the absence of a compVC means discordance if (NO_SAMPLES_SPECIFIED) return (compVCs == null || compVCs.isEmpty()); // check if we find it in the variant rod Map<String, Genotype> genotypes = vc.getGenotypes(samples); for (Genotype g : genotypes.values()) { if (sampleHasVariant(g)) { // There is a variant called (or filtered with not exclude filtered option set) that is not // HomRef for at least one of the samples. if (compVCs == null) return true; // Look for this sample in the all vcs of the comp ROD track. boolean foundVariant = false; for (VariantContext compVC : compVCs) { if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) { foundVariant = true; break; } } // if (at least one sample) was not found in all VCs of the comp ROD, we have discordance if (!foundVariant) return true; } } return false; // we only get here if all samples have a variant in the comp rod. }
/** * Compares the covariate report lists. * * @param diffs map where to annotate the difference. * @param other the argument collection to compare against. * @param thisRole the name for this argument collection that makes sense to the user. * @param otherRole the name for the other argument collection that makes sense to the end user. * @return <code>true</code> if a difference was found. */ @Requires("diffs != null && other != null && thisRole != null && otherRole != null") private boolean compareRequestedCovariates( final Map<String, String> diffs, final RecalibrationArgumentCollection other, final String thisRole, final String otherRole) { final Set<String> beforeNames = new HashSet<>(this.COVARIATES.length); final Set<String> afterNames = new HashSet<>(other.COVARIATES.length); Utils.addAll(beforeNames, this.COVARIATES); Utils.addAll(afterNames, other.COVARIATES); final Set<String> intersect = new HashSet<>(Math.min(beforeNames.size(), afterNames.size())); intersect.addAll(beforeNames); intersect.retainAll(afterNames); String diffMessage = null; if (intersect.size() == 0) { // In practice this is not possible due to required covariates but... diffMessage = String.format( "There are no common covariates between '%s' and '%s'" + " recalibrator reports. Covariates in '%s': {%s}. Covariates in '%s': {%s}.", thisRole, otherRole, thisRole, Utils.join(", ", this.COVARIATES), otherRole, Utils.join(",", other.COVARIATES)); } else if (intersect.size() != beforeNames.size() || intersect.size() != afterNames.size()) { beforeNames.removeAll(intersect); afterNames.removeAll(intersect); diffMessage = String.format( "There are differences in the set of covariates requested in the" + " '%s' and '%s' recalibrator reports. " + " Exclusive to '%s': {%s}. Exclusive to '%s': {%s}.", thisRole, otherRole, thisRole, Utils.join(", ", beforeNames), otherRole, Utils.join(", ", afterNames)); } if (diffMessage != null) { diffs.put("covariate", diffMessage); return true; } else { return false; } }
/** * Annotates a map with any difference encountered in a simple value report argument that differs * between this an another {@link RecalibrationArgumentCollection} instance. * * <p>The key of the new entry would be the name of that argument in the report file. The value is * a message that explains the difference to the end user. * * <p> * * <p>This method should not return any exception. * * @param diffs where to annotate the differences. * @param name the name of the report argument to compare. * @param thisValue this argument collection value for that argument. * @param otherValue the other collection value for that argument. * @param thisRole the name used to refer to this RAC report that makes sense to the end user. * @param otherRole the name used to refer to the other RAC report that makes sense to the end * user. * @type T the argument Object value type. * @return <code>true</code> if a difference has been spotted, thus <code>diff</code> has been * modified. */ private <T> boolean compareSimpleReportArgument( final Map<String, String> diffs, final String name, final T thisValue, final T otherValue, final String thisRole, final String otherRole) { if (thisValue == null && otherValue == null) { return false; } else if (thisValue != null && thisValue.equals(otherValue)) { return false; } else { diffs.put( name, String.format( "differences between '%s' {%s} and '%s' {%s}.", thisRole, thisValue == null ? "" : thisValue, otherRole, otherValue == null ? "" : otherValue)); return true; } }
/** * Output the finalized report * * @param result an integer that doesn't get used for anything */ public void onTraversalDone(Integer result) { logger.info("Finalizing variant report"); for (StateKey stateKey : evaluationContexts.keySet()) { NewEvaluationContext nec = evaluationContexts.get(stateKey); for (VariantEvaluator ve : nec.getEvaluationClassList().values()) { ve.finalizeEvaluation(); AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); Map<Field, DataPoint> datamap = scanner.getData(); for (Field field : datamap.keySet()) { try { field.setAccessible(true); if (field.get(ve) instanceof TableType) { TableType t = (TableType) field.get(ve); String subTableName = ve.getClass().getSimpleName() + "." + field.getName(); final DataPoint dataPointAnn = datamap.get(field); GATKReportTable table; if (!report.hasTable(subTableName)) { report.addTable(subTableName, dataPointAnn.description()); table = report.getTable(subTableName); table.addPrimaryKey("entry", false); table.addColumn(subTableName, subTableName); for (VariantStratifier vs : stratificationObjects) { table.addColumn(vs.getName(), "unknown"); } table.addColumn("row", "unknown"); for (Object o : t.getColumnKeys()) { String c; if (o instanceof String) { c = (String) o; } else { c = o.toString(); } table.addColumn(c, 0.0); } } else { table = report.getTable(subTableName); } for (int row = 0; row < t.getRowKeys().length; row++) { String r = (String) t.getRowKeys()[row]; for (VariantStratifier vs : stratificationObjects) { final String columnName = vs.getName(); table.set(stateKey.toString() + r, columnName, stateKey.get(columnName)); } for (int col = 0; col < t.getColumnKeys().length; col++) { String c; if (t.getColumnKeys()[col] instanceof String) { c = (String) t.getColumnKeys()[col]; } else { c = t.getColumnKeys()[col].toString(); } String newStateKey = stateKey.toString() + r; table.set(newStateKey, c, t.getCell(row, col)); table.set(newStateKey, "row", r); } } } else { GATKReportTable table = report.getTable(ve.getClass().getSimpleName()); for (VariantStratifier vs : stratificationObjects) { String columnName = vs.getName(); table.set(stateKey.toString(), columnName, stateKey.get(vs.getName())); } table.set(stateKey.toString(), field.getName(), field.get(ve)); } } catch (IllegalAccessException e) { throw new StingException("IllegalAccessException: " + e); } } } } report.print(out); }
/** Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */ public void initialize() { // Get list of samples to include in the output List<String> rodNames = Arrays.asList(variantCollection.variants.getName()); Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); TreeSet<String> vcfSamples = new TreeSet<String>( SampleUtils.getSampleList( vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions); // first, add any requested samples samples.addAll(samplesFromFile); samples.addAll(samplesFromExpressions); samples.addAll(sampleNames); // if none were requested, we want all of them if (samples.isEmpty()) { samples.addAll(vcfSamples); NO_SAMPLES_SPECIFIED = true; } // now, exclude any requested samples Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles); samples.removeAll(XLsamplesFromFile); samples.removeAll(XLsampleNames); if (samples.size() == 0 && !NO_SAMPLES_SPECIFIED) throw new UserException( "All samples requested to be included were also requested to be excluded."); for (String sample : samples) logger.info("Including sample '" + sample + "'"); // if user specified types to include, add these, otherwise, add all possible variant context // types to list of vc types to include if (TYPES_TO_INCLUDE.isEmpty()) { for (VariantContext.Type t : VariantContext.Type.values()) selectedTypes.add(t); } else { for (VariantContext.Type t : TYPES_TO_INCLUDE) selectedTypes.add(t); } // Initialize VCF header Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); headerLines.add(new VCFHeaderLine("source", "SelectVariants")); if (KEEP_ORIGINAL_CHR_COUNTS) { headerLines.add( new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC")); headerLines.add( new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF")); headerLines.add( new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN")); } vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) { // It's not necessary that the user supply select names for the JEXL expressions, since those // expressions will only be needed for omitting records. Make up the select names here. selectNames.add(String.format("select-%d", i)); } jexls = VariantContextUtils.initializeMatchExps(selectNames, SELECT_EXPRESSIONS); // Look at the parameters to decide which analysis to perform DISCORDANCE_ONLY = discordanceTrack.isBound(); if (DISCORDANCE_ONLY) logger.info( "Selecting only variants discordant with the track: " + discordanceTrack.getName()); CONCORDANCE_ONLY = concordanceTrack.isBound(); if (CONCORDANCE_ONLY) logger.info( "Selecting only variants concordant with the track: " + concordanceTrack.getName()); if (MENDELIAN_VIOLATIONS) { if (FAMILY_STRUCTURE_FILE != null) { try { for (final String line : new XReadLines(FAMILY_STRUCTURE_FILE)) { MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD); if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom())) mvSet.add(mv); } } catch (FileNotFoundException e) { throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e); } if (outMVFile != null) try { outMVFileStream = new PrintStream(outMVFile); } catch (FileNotFoundException e) { throw new UserException.CouldNotCreateOutputFile( outMVFile, "Can't open output file", e); } } else mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); } else if (!FAMILY_STRUCTURE.isEmpty()) { mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); MENDELIAN_VIOLATIONS = true; } SELECT_RANDOM_NUMBER = numRandom > 0; if (SELECT_RANDOM_NUMBER) { logger.info("Selecting " + numRandom + " variants at random from the variant track"); variantArray = new RandomVariantStructure[numRandom]; } SELECT_RANDOM_FRACTION = fractionRandom > 0; if (SELECT_RANDOM_FRACTION) logger.info( "Selecting approximately " + 100.0 * fractionRandom + "% of the variants at random from the variant track"); }