Java PoN Examples

Programming Language: Java

Namespace/Package Name: org.broadinstitute.hellbender.utils.hdf5

Class/Type: PoN

Examples at hotexamples.com: 3

Java PoN - 3 examples found. These are the top rated real world Java examples of org.broadinstitute.hellbender.utils.hdf5.PoN extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

betaHats(1)

factorNormalization(1)

getNormalizedCounts(1)

getPanelTargetNames(1)

getReducedPanelCounts(1)

getReducedPanelPInverseCounts(1)

getSampleNames(1)

getTargetNames(1)

getTargets(1)

tangentNormalization(1)

Example #1

Show file

File: TangentNormalizer.java Project: apete/gatk-protected

  private static ReadCountCollection createCoverageProfile(
      final PoN pon, final ReadCountCollection inputReadCounts) {
    Utils.nonNull(pon, "PoN cannot be null.");
    Utils.nonNull(
        inputReadCounts, "input read counts cannot be null when creating a coverage profile.");
    ParamUtils.isPositive(
        inputReadCounts.columnNames().size(),
        "inputReadCounts column names cannot be an empty list.");
    final Case2PoNTargetMapper targetMapper =
        new Case2PoNTargetMapper(inputReadCounts.targets(), pon.getTargetNames());
    final RealMatrix inputCounts = targetMapper.fromCaseToPoNCounts(inputReadCounts.counts());
    final RealMatrix targetNormalizedCounts = pon.factorNormalization(inputCounts);

    return targetMapper.fromPoNtoCaseCountCollection(
        targetNormalizedCounts, inputReadCounts.columnNames());
  }

Example #2

Show file

File: TangentNormalizer.java Project: apete/gatk-protected

  /**
   * Project all of the normals used to create the PoN into the reduced panel.
   *
   * @param pon Not {@code null}.
   * @param ctx Spark context, {@code null} indicates no spark context available, which is
   *     supported.
   * @return never {@code null}. Result will contain multiple columns in each of the
   *     ReadCountCollection attributes.
   */
  public static TangentNormalizationResult tangentNormalizeNormalsInPoN(
      final PoN pon, final JavaSparkContext ctx) {
    // Get the list of sample names in a modifiable List
    final List<String> sampleNamesCopy = new ArrayList<>(pon.getSampleNames());

    logger.info("Loading normalized counts...");
    final ReadCountCollection coverageProfile =
        new ReadCountCollection(
            SetUniqueList.setUniqueList(pon.getTargets()),
            SetUniqueList.setUniqueList(sampleNamesCopy),
            pon.getNormalizedCounts());

    logger.info("Tangent normalizing the normals (normalized by target factors) ...");

    // For each sample in the PoN, tangent normalize against the qc reduced PoN.
    return TangentNormalizer.tangentNormalize(pon, coverageProfile, ctx);
  }

Example #3

Show file

File: TangentNormalizer.java Project: apete/gatk-protected

  /**
   * Tangent normalize a coverage profile.
   *
   * <p>Notes about the Spark tangent normalization can be found in docs/PoN/
   *
   * @param pon Not {@code null}
   * @param targetFactorNormalizedCounts ReadCountCollection of counts that have already been
   *     normalized fully (typically, including the target factor normalization). I.e. a coverage
   *     profile The column names should be intact. Not {@code null} See {@link
   *     TangentNormalizer::createCoverageProfile}
   * @return never {@code null}
   */
  private static TangentNormalizationResult tangentNormalize(
      final PoN pon, final ReadCountCollection targetFactorNormalizedCounts, JavaSparkContext ctx) {

    Utils.nonNull(pon, "PoN cannot be null.");
    Utils.nonNull(targetFactorNormalizedCounts, "targetFactorNormalizedCounts cannot be null.");
    Utils.nonNull(
        targetFactorNormalizedCounts.columnNames(),
        "targetFactorNormalizedCounts column names cannot be null.");
    ParamUtils.isPositive(
        targetFactorNormalizedCounts.columnNames().size(),
        "targetFactorNormalizedCounts column names cannot be an empty list.");

    final Case2PoNTargetMapper targetMapper =
        new Case2PoNTargetMapper(targetFactorNormalizedCounts.targets(), pon.getPanelTargetNames());

    // The input counts with rows (targets) sorted so that they match the PoN's order.
    final RealMatrix tangentNormalizationRawInputCounts =
        targetMapper.fromCaseToPoNCounts(targetFactorNormalizedCounts.counts());

    // We prepare the counts for tangent normalization.
    final RealMatrix tangentNormalizationInputCounts =
        composeTangentNormalizationInputMatrix(tangentNormalizationRawInputCounts);

    if (ctx == null) {

      // Calculate the beta-hats for the input read count columns (samples).
      logger.info("Calculating beta hats...");
      final RealMatrix tangentBetaHats =
          pon.betaHats(tangentNormalizationInputCounts, true, EPSILON);

      // Actual tangent normalization step.
      logger.info(
          "Performing actual tangent normalization ("
              + tangentNormalizationInputCounts.getColumnDimension()
              + " columns)...");
      final RealMatrix tangentNormalizedCounts =
          pon.tangentNormalization(tangentNormalizationInputCounts, tangentBetaHats, true);

      // Output the tangent normalized counts.
      logger.info("Post-processing tangent normalization results...");
      final ReadCountCollection tangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizedCounts, targetFactorNormalizedCounts.columnNames());
      final ReadCountCollection preTangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizationInputCounts, targetFactorNormalizedCounts.columnNames());

      return new TangentNormalizationResult(
          tangentNormalized, preTangentNormalized, tangentBetaHats, targetFactorNormalizedCounts);

    } else {

      /*
      Using Spark:  the code here is a little more complex for optimization purposes.

      Please see notes in docs/PoN ...

      Ahat^T = (C^T P^T) A^T
      Therefore, C^T is the RowMatrix

      pinv: P
      panel: A
      projection: Ahat
      cases: C
      betahat: C^T P^T
      tangentNormalizedCounts: C - Ahat
       */
      final RealMatrix pinv = pon.getReducedPanelPInverseCounts();
      final RealMatrix panel = pon.getReducedPanelCounts();

      // Make the C^T a distributed matrix (RowMatrix)
      final RowMatrix caseTDistMat =
          SparkConverter.convertRealMatrixToSparkRowMatrix(
              ctx, tangentNormalizationInputCounts.transpose(), TN_NUM_SLICES_SPARK);

      // Spark local matrices (transposed)
      final Matrix pinvTLocalMat =
          new DenseMatrix(
                  pinv.getRowDimension(),
                  pinv.getColumnDimension(),
                  Doubles.concat(pinv.getData()),
                  true)
              .transpose();
      final Matrix panelTLocalMat =
          new DenseMatrix(
                  panel.getRowDimension(),
                  panel.getColumnDimension(),
                  Doubles.concat(panel.getData()),
                  true)
              .transpose();

      // Calculate the projection transpose in a distributed matrix, then convert to Apache Commons
      // matrix (not transposed)
      final RowMatrix betahatDistMat = caseTDistMat.multiply(pinvTLocalMat);
      final RowMatrix projectionTDistMat = betahatDistMat.multiply(panelTLocalMat);
      final RealMatrix projection =
          SparkConverter.convertSparkRowMatrixToRealMatrix(
                  projectionTDistMat, tangentNormalizationInputCounts.transpose().getRowDimension())
              .transpose();

      // Subtract the cases from the projection
      final RealMatrix tangentNormalizedCounts =
          tangentNormalizationInputCounts.subtract(projection);

      // Construct the result object and return it with the correct targets.
      final ReadCountCollection tangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizedCounts, targetFactorNormalizedCounts.columnNames());
      final ReadCountCollection preTangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizationInputCounts, targetFactorNormalizedCounts.columnNames());
      final RealMatrix tangentBetaHats =
          SparkConverter.convertSparkRowMatrixToRealMatrix(
              betahatDistMat, tangentNormalizedCounts.getColumnDimension());
      return new TangentNormalizationResult(
          tangentNormalized,
          preTangentNormalized,
          tangentBetaHats.transpose(),
          targetFactorNormalizedCounts);
    }
  }