/**
   * Update the recalibration statistics using the information in recalInfo
   *
   * @param recalInfo data structure holding information about the recalibration values for a single
   *     read
   */
  @Requires("recalInfo != null")
  public void updateDataForRead(final ReadRecalibrationInfo recalInfo) {
    final GATKSAMRecord read = recalInfo.getRead();
    final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
    final RecalibrationTables tables = getUpdatableRecalibrationTables();
    final NestedIntegerArray<RecalDatum> qualityScoreTable = tables.getQualityScoreTable();

    for (int offset = 0; offset < read.getReadBases().length; offset++) {
      if (!recalInfo.skip(offset)) {

        for (final EventType eventType : EventType.values()) {
          final int[] keys = readCovariates.getKeySet(offset, eventType);
          final int eventIndex = eventType.ordinal();
          final byte qual = recalInfo.getQual(eventType, offset);
          final double isError = recalInfo.getErrorFraction(eventType, offset);

          RecalUtils.incrementDatumOrPutIfNecessary(
              qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);

          for (int i = 2; i < covariates.length; i++) {
            if (keys[i] < 0) continue;

            RecalUtils.incrementDatumOrPutIfNecessary(
                tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
          }
        }
      }
    }
  }
 /**
  * Creates the recalibration report. Report can then be written to a stream via
  * GATKReport.print(PrintStream).
  *
  * @return newly created recalibration report
  */
 public GATKReport createGATKReport() {
   return RecalUtils.createRecalibrationGATKReport(
       argumentTable,
       quantizationInfo,
       recalibrationTables,
       requestedCovariates,
       RAC.SORT_BY_ALL_COLUMNS);
 }
 /**
  * Combines two recalibration reports by adding all observations and errors
  *
  * <p>Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You
  * have to recalculate them after combining. The reason for not calculating it is because this
  * function is intended for combining a series of recalibration reports, and it only makes sense
  * to calculate the empirical qualities and quantized qualities after all the recalibration
  * reports have been combined. Having the user recalculate when appropriate, makes this method
  * faster
  *
  * <p>Note2: The empirical quality reported, however, is recalculated given its simplicity.
  *
  * @param other the recalibration report to combine with this one
  */
 public void combine(final RecalibrationReport other) {
   for (int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++) {
     final NestedIntegerArray<RecalDatum> myTable = recalibrationTables.getTable(tableIndex);
     final NestedIntegerArray<RecalDatum> otherTable =
         other.recalibrationTables.getTable(tableIndex);
     RecalUtils.combineTables(myTable, otherTable);
   }
 }
Esempio n. 4
0
  /** Merge all of the tables from toMerge into into this set of tables */
  public RecalibrationTables combine(final RecalibrationTables toMerge) {
    if (numTables() != toMerge.numTables())
      throw new IllegalArgumentException(
          "Attempting to merge RecalibrationTables with different sizes");

    for (int i = 0; i < numTables(); i++) {
      final NestedIntegerArray<RecalDatum> myTable = this.allTables.get(i);
      final NestedIntegerArray<RecalDatum> otherTable = toMerge.allTables.get(i);
      RecalUtils.combineTables(myTable, otherTable);
    }

    return this;
  }
  public RecalibrationReport(final File recalFile, final SortedSet<String> allReadGroups) {
    final GATKReport report = new GATKReport(recalFile);

    argumentTable = report.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
    RAC = initializeArgumentCollectionTable(argumentTable);

    GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
    quantizationInfo = initializeQuantizationTable(quantizedTable);

    Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates =
        RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
    ArrayList<Covariate> requiredCovariates = covariates.getFirst();
    ArrayList<Covariate> optionalCovariates = covariates.getSecond();
    requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
    optionalCovariateIndexes = new HashMap<String, Integer>(optionalCovariates.size());
    int covariateIndex = 0;
    for (final Covariate covariate : requiredCovariates)
      requestedCovariates[covariateIndex++] = covariate;
    for (final Covariate covariate : optionalCovariates) {
      requestedCovariates[covariateIndex] = covariate;
      final String covariateName =
          covariate
              .getClass()
              .getSimpleName()
              .split("Covariate")[
              0]; // get the name of the covariate (without the "covariate" part of it) so we can
                  // match with the GATKReport
      optionalCovariateIndexes.put(covariateName, covariateIndex - 2);
      covariateIndex++;
    }

    for (Covariate cov : requestedCovariates)
      cov.initialize(
          RAC); // initialize any covariate member variables using the shared argument collection

    recalibrationTables = new RecalibrationTables(requestedCovariates, allReadGroups.size());

    initializeReadGroupCovariates(allReadGroups);

    parseReadGroupTable(
        report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE),
        recalibrationTables.getReadGroupTable());

    parseQualityScoreTable(
        report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE),
        recalibrationTables.getQualityScoreTable());

    parseAllCovariatesTable(
        report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
  }