Esempi in Java per RowMatrix

Linguaggio di programmazione: Java

Spazio dei nomi/nome del pacchetto: org.apache.spark.mllib.linalg.distributed

Classe/tipologia: RowMatrix

Esempi su hotexamples.com: 2

RowMatrix in Java: 2 esempi trovati. Questi sono i migliori esempi reali in Java per org.apache.spark.mllib.linalg.distributed.RowMatrix, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

multiply(1)

rows(1)

toString(1)

Esempio n. 1

Mostra file

File: AppRunner.java Progetto: sergiofgonzalez/spark-java-examples

  public static void main(String[] args) throws IOException {

    SparkConf config = new SparkConf().setAppName("003-distributed-matrices").setMaster("local[*]");

    try (JavaSparkContext sc = new JavaSparkContext(config)) {

      /* Create a RowMatrix */
      List<Vector> vectors = new ArrayList<>(10);
      for (int i = 0; i < 10; i++) {
        vectors.add(Vectors.dense(getVectorElements()));
      }

      JavaRDD<Vector> rowsRDD = sc.parallelize(vectors, 4);

      RowMatrix rowMatrix = new RowMatrix(rowsRDD.rdd());
      System.out.println(rowMatrix.toString());

      /* Create an IndexedRowMatrix */
      JavaRDD<IndexedRow> indexedRows =
          sc.parallelize(
              Arrays.asList(new IndexedRow(0, vectors.get(0)), new IndexedRow(1, vectors.get(1))));
      IndexedRowMatrix indexedRowMatrix = new IndexedRowMatrix(indexedRows.rdd());
      System.out.println(indexedRowMatrix);

      /* convert */
      JavaRDD<IndexedRow> indexedRowsFromRowMatrix =
          rowMatrix
              .rows()
              .toJavaRDD()
              .zipWithIndex()
              .map((Tuple2<Vector, Long> t) -> new IndexedRow(t._2(), t._1()));
      IndexedRowMatrix indexedRowMatrixFromRowMatrix =
          new IndexedRowMatrix(indexedRowsFromRowMatrix.rdd());
      System.out.println(indexedRowMatrixFromRowMatrix);

      /* Create a CoordinateMatrix
       *     M = [ 5 0 1
       *           0 3 4 ]
       */
      JavaRDD<MatrixEntry> matrixEntries =
          sc.parallelize(
              Arrays.asList(
                  new MatrixEntry(0, 0, 5.),
                  new MatrixEntry(1, 1, 3.),
                  new MatrixEntry(2, 0, 1.),
                  new MatrixEntry(2, 1, 4.)));
      CoordinateMatrix coordMatrix = new CoordinateMatrix(matrixEntries.rdd());
      System.out.println(coordMatrix);
      printSeparator();
    }
  }

Esempio n. 2

Mostra file

File: TangentNormalizer.java Progetto: apete/gatk-protected

  /**
   * Tangent normalize a coverage profile.
   *
   * <p>Notes about the Spark tangent normalization can be found in docs/PoN/
   *
   * @param pon Not {@code null}
   * @param targetFactorNormalizedCounts ReadCountCollection of counts that have already been
   *     normalized fully (typically, including the target factor normalization). I.e. a coverage
   *     profile The column names should be intact. Not {@code null} See {@link
   *     TangentNormalizer::createCoverageProfile}
   * @return never {@code null}
   */
  private static TangentNormalizationResult tangentNormalize(
      final PoN pon, final ReadCountCollection targetFactorNormalizedCounts, JavaSparkContext ctx) {

    Utils.nonNull(pon, "PoN cannot be null.");
    Utils.nonNull(targetFactorNormalizedCounts, "targetFactorNormalizedCounts cannot be null.");
    Utils.nonNull(
        targetFactorNormalizedCounts.columnNames(),
        "targetFactorNormalizedCounts column names cannot be null.");
    ParamUtils.isPositive(
        targetFactorNormalizedCounts.columnNames().size(),
        "targetFactorNormalizedCounts column names cannot be an empty list.");

    final Case2PoNTargetMapper targetMapper =
        new Case2PoNTargetMapper(targetFactorNormalizedCounts.targets(), pon.getPanelTargetNames());

    // The input counts with rows (targets) sorted so that they match the PoN's order.
    final RealMatrix tangentNormalizationRawInputCounts =
        targetMapper.fromCaseToPoNCounts(targetFactorNormalizedCounts.counts());

    // We prepare the counts for tangent normalization.
    final RealMatrix tangentNormalizationInputCounts =
        composeTangentNormalizationInputMatrix(tangentNormalizationRawInputCounts);

    if (ctx == null) {

      // Calculate the beta-hats for the input read count columns (samples).
      logger.info("Calculating beta hats...");
      final RealMatrix tangentBetaHats =
          pon.betaHats(tangentNormalizationInputCounts, true, EPSILON);

      // Actual tangent normalization step.
      logger.info(
          "Performing actual tangent normalization ("
              + tangentNormalizationInputCounts.getColumnDimension()
              + " columns)...");
      final RealMatrix tangentNormalizedCounts =
          pon.tangentNormalization(tangentNormalizationInputCounts, tangentBetaHats, true);

      // Output the tangent normalized counts.
      logger.info("Post-processing tangent normalization results...");
      final ReadCountCollection tangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizedCounts, targetFactorNormalizedCounts.columnNames());
      final ReadCountCollection preTangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizationInputCounts, targetFactorNormalizedCounts.columnNames());

      return new TangentNormalizationResult(
          tangentNormalized, preTangentNormalized, tangentBetaHats, targetFactorNormalizedCounts);

    } else {

      /*
      Using Spark:  the code here is a little more complex for optimization purposes.

      Please see notes in docs/PoN ...

      Ahat^T = (C^T P^T) A^T
      Therefore, C^T is the RowMatrix

      pinv: P
      panel: A
      projection: Ahat
      cases: C
      betahat: C^T P^T
      tangentNormalizedCounts: C - Ahat
       */
      final RealMatrix pinv = pon.getReducedPanelPInverseCounts();
      final RealMatrix panel = pon.getReducedPanelCounts();

      // Make the C^T a distributed matrix (RowMatrix)
      final RowMatrix caseTDistMat =
          SparkConverter.convertRealMatrixToSparkRowMatrix(
              ctx, tangentNormalizationInputCounts.transpose(), TN_NUM_SLICES_SPARK);

      // Spark local matrices (transposed)
      final Matrix pinvTLocalMat =
          new DenseMatrix(
                  pinv.getRowDimension(),
                  pinv.getColumnDimension(),
                  Doubles.concat(pinv.getData()),
                  true)
              .transpose();
      final Matrix panelTLocalMat =
          new DenseMatrix(
                  panel.getRowDimension(),
                  panel.getColumnDimension(),
                  Doubles.concat(panel.getData()),
                  true)
              .transpose();

      // Calculate the projection transpose in a distributed matrix, then convert to Apache Commons
      // matrix (not transposed)
      final RowMatrix betahatDistMat = caseTDistMat.multiply(pinvTLocalMat);
      final RowMatrix projectionTDistMat = betahatDistMat.multiply(panelTLocalMat);
      final RealMatrix projection =
          SparkConverter.convertSparkRowMatrixToRealMatrix(
                  projectionTDistMat, tangentNormalizationInputCounts.transpose().getRowDimension())
              .transpose();

      // Subtract the cases from the projection
      final RealMatrix tangentNormalizedCounts =
          tangentNormalizationInputCounts.subtract(projection);

      // Construct the result object and return it with the correct targets.
      final ReadCountCollection tangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizedCounts, targetFactorNormalizedCounts.columnNames());
      final ReadCountCollection preTangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizationInputCounts, targetFactorNormalizedCounts.columnNames());
      final RealMatrix tangentBetaHats =
          SparkConverter.convertSparkRowMatrixToRealMatrix(
              betahatDistMat, tangentNormalizedCounts.getColumnDimension());
      return new TangentNormalizationResult(
          tangentNormalized,
          preTangentNormalized,
          tangentBetaHats.transpose(),
          targetFactorNormalizedCounts);
    }
  }