Java MRJobConfiguration.addBinaryBlockSerializationFramework Beispiele

Programmiersprache: Java

Namespace / Paketname: com.ibm.bi.dml.runtime.matrix.mapred

Klasse / Typ: MRJobConfiguration

Methode / Funktion: addBinaryBlockSerializationFramework

Beispiele auf hotexamples.com: 2

Java MRJobConfiguration.addBinaryBlockSerializationFramework - 2 Beispiele gefunden. Dies sind die am besten bewerteten Java Beispiele für die com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.addBinaryBlockSerializationFramework, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

addBinaryBlockSerializationFramework(2)

setInstructionsInReducer(1)

setUpMultipleOutputs(1)

setUpMultipleInputs(1)

setUniqueWorkingDir(1)

setNumReducers(1)

setMatrixValueClass(1)

setMatricesDimensions(1)

setInstructionsInMapper(1)

computeMatrixCharacteristics(1)

setCombineInstructions(1)

setBlocksSizes(1)

setAggregateInstructions(1)

getMatrixCharacteristicsForOutput(1)

getCombineInstruction(1)

deriveRepresentation(1)

setUpOutputIndexesForMapper(1)

Beispiel #1

Datei anzeigen

Datei: ReaderBinaryBlock.java Projekt: ripplebud/systemml

  /**
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   * @throws IllegalAccessException
   * @throws InstantiationException
   */
  @SuppressWarnings("deprecation")
  private void readBinaryBlockMatrixBlocksFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      Collection<IndexedMatrixValue> dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException {
    MatrixIndexes key = new MatrixIndexes();
    MatrixBlock value = new MatrixBlock();

    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
      MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
    {
      // directly read from sequence files (individual partfiles)
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

      try {
        while (reader.next(key, value)) {
          int row_offset = (int) (key.getRowIndex() - 1) * brlen;
          int col_offset = (int) (key.getColumnIndex() - 1) * bclen;
          int rows = value.getNumRows();
          int cols = value.getNumColumns();

          // bound check per block
          if (row_offset + rows < 0
              || row_offset + rows > rlen
              || col_offset + cols < 0
              || col_offset + cols > clen) {
            throw new IOException(
                "Matrix block ["
                    + (row_offset + 1)
                    + ":"
                    + (row_offset + rows)
                    + ","
                    + (col_offset + 1)
                    + ":"
                    + (col_offset + cols)
                    + "] "
                    + "out of overall matrix range [1:"
                    + rlen
                    + ",1:"
                    + clen
                    + "].");
          }

          // copy block to result
          dest.add(new IndexedMatrixValue(new MatrixIndexes(key), new MatrixBlock(value)));
        }
      } finally {
        IOUtilFunctions.closeSilently(reader);
      }
    }
  }

Beispiel #2

Datei anzeigen

Datei: ReaderBinaryBlock.java Projekt: ripplebud/systemml

  /**
   * Note: For efficiency, we directly use SequenceFile.Reader instead of SequenceFileInputFormat-
   * InputSplits-RecordReader (SequenceFileRecordReader). First, this has no drawbacks since the
   * SequenceFileRecordReader internally uses SequenceFile.Reader as well. Second, it is
   * advantageous if the actual sequence files are larger than the file splits created by
   * informat.getSplits (which is usually aligned to the HDFS block size) because then there is
   * overhead for finding the actual split between our 1k-1k blocks. This case happens if the read
   * matrix was create by CP or when jobs directly write to large output files (e.g., parfor matrix
   * partitioning).
   *
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   * @throws IllegalAccessException
   * @throws InstantiationException
   * @throws DMLRuntimeException
   */
  @SuppressWarnings("deprecation")
  private static void readBinaryBlockMatrixFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      MatrixBlock dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException, DMLRuntimeException {
    boolean sparse = dest.isInSparseFormat();
    MatrixIndexes key = new MatrixIndexes();
    MatrixBlock value = new MatrixBlock();

    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
      MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
    {
      // directly read from sequence files (individual partfiles)
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

      try {
        // note: next(key, value) does not yet exploit the given serialization classes, record
        // reader does but is generally slower.
        while (reader.next(key, value)) {
          // empty block filter (skip entire block)
          if (value.isEmptyBlock(false)) continue;

          int row_offset = (int) (key.getRowIndex() - 1) * brlen;
          int col_offset = (int) (key.getColumnIndex() - 1) * bclen;

          int rows = value.getNumRows();
          int cols = value.getNumColumns();

          // bound check per block
          if (row_offset + rows < 0
              || row_offset + rows > rlen
              || col_offset + cols < 0
              || col_offset + cols > clen) {
            throw new IOException(
                "Matrix block ["
                    + (row_offset + 1)
                    + ":"
                    + (row_offset + rows)
                    + ","
                    + (col_offset + 1)
                    + ":"
                    + (col_offset + cols)
                    + "] "
                    + "out of overall matrix range [1:"
                    + rlen
                    + ",1:"
                    + clen
                    + "].");
          }

          // copy block to result
          if (sparse) {
            dest.appendToSparse(value, row_offset, col_offset);
            // note: append requires final sort
          } else {
            dest.copy(
                row_offset, row_offset + rows - 1, col_offset, col_offset + cols - 1, value, false);
          }
        }
      } finally {
        IOUtilFunctions.closeSilently(reader);
      }
    }

    if (sparse && clen > bclen) {
      // no need to sort if 1 column block since always sorted
      dest.sortSparseRows();
    }
  }