/** * Update the recalibration statistics using the information in recalInfo * * @param recalInfo data structure holding information about the recalibration values for a single * read */ @Requires("recalInfo != null") public void updateDataForRead(final ReadRecalibrationInfo recalInfo) { final GATKSAMRecord read = recalInfo.getRead(); final ReadCovariates readCovariates = recalInfo.getCovariatesValues(); final RecalibrationTables tables = getUpdatableRecalibrationTables(); final NestedIntegerArray<RecalDatum> qualityScoreTable = tables.getQualityScoreTable(); for (int offset = 0; offset < read.getReadBases().length; offset++) { if (!recalInfo.skip(offset)) { for (final EventType eventType : EventType.values()) { final int[] keys = readCovariates.getKeySet(offset, eventType); final int eventIndex = eventType.ordinal(); final byte qual = recalInfo.getQual(eventType, offset); final double isError = recalInfo.getErrorFraction(eventType, offset); RecalUtils.incrementDatumOrPutIfNecessary( qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex); for (int i = 2; i < covariates.length; i++) { if (keys[i] < 0) continue; RecalUtils.incrementDatumOrPutIfNecessary( tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex); } } } } }
/** * Creates the recalibration report. Report can then be written to a stream via * GATKReport.print(PrintStream). * * @return newly created recalibration report */ public GATKReport createGATKReport() { return RecalUtils.createRecalibrationGATKReport( argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, RAC.SORT_BY_ALL_COLUMNS); }
/** * Combines two recalibration reports by adding all observations and errors * * <p>Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You * have to recalculate them after combining. The reason for not calculating it is because this * function is intended for combining a series of recalibration reports, and it only makes sense * to calculate the empirical qualities and quantized qualities after all the recalibration * reports have been combined. Having the user recalculate when appropriate, makes this method * faster * * <p>Note2: The empirical quality reported, however, is recalculated given its simplicity. * * @param other the recalibration report to combine with this one */ public void combine(final RecalibrationReport other) { for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) { final NestedIntegerArray<RecalDatum> myTable = recalibrationTables.getTable(tableIndex); final NestedIntegerArray<RecalDatum> otherTable = other.recalibrationTables.getTable(tableIndex); RecalUtils.combineTables(myTable, otherTable); } }
/** Merge all of the tables from toMerge into into this set of tables */ public RecalibrationTables combine(final RecalibrationTables toMerge) { if (numTables() != toMerge.numTables()) throw new IllegalArgumentException( "Attempting to merge RecalibrationTables with different sizes"); for (int i = 0; i < numTables(); i++) { final NestedIntegerArray<RecalDatum> myTable = this.allTables.get(i); final NestedIntegerArray<RecalDatum> otherTable = toMerge.allTables.get(i); RecalUtils.combineTables(myTable, otherTable); } return this; }
public RecalibrationReport(final File recalFile, final SortedSet<String> allReadGroups) { final GATKReport report = new GATKReport(recalFile); argumentTable = report.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE); RAC = initializeArgumentCollectionTable(argumentTable); GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE); quantizationInfo = initializeQuantizationTable(quantizedTable); Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates ArrayList<Covariate> requiredCovariates = covariates.getFirst(); ArrayList<Covariate> optionalCovariates = covariates.getSecond(); requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()]; optionalCovariateIndexes = new HashMap<String, Integer>(optionalCovariates.size()); int covariateIndex = 0; for (final Covariate covariate : requiredCovariates) requestedCovariates[covariateIndex++] = covariate; for (final Covariate covariate : optionalCovariates) { requestedCovariates[covariateIndex] = covariate; final String covariateName = covariate .getClass() .getSimpleName() .split("Covariate")[ 0]; // get the name of the covariate (without the "covariate" part of it) so we can // match with the GATKReport optionalCovariateIndexes.put(covariateName, covariateIndex - 2); covariateIndex++; } for (Covariate cov : requestedCovariates) cov.initialize( RAC); // initialize any covariate member variables using the shared argument collection recalibrationTables = new RecalibrationTables(requestedCovariates, allReadGroups.size()); initializeReadGroupCovariates(allReadGroups); parseReadGroupTable( report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getReadGroupTable()); parseQualityScoreTable( report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getQualityScoreTable()); parseAllCovariatesTable( report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables); }