Ejemplos de MRJobConfiguration en Java

Lenguaje de programación: Java

Namespace/Package Name: com.ibm.bi.dml.runtime.matrix.mapred

Clase / Tipo: MRJobConfiguration

Ejemplos en hotexamples.com: 4

Java MRJobConfiguration - 4 ejemplos encontrados. Estos son los ejemplos en Java del mundo real mejor valorados de com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

addBinaryBlockSerializationFramework(2)

setInstructionsInReducer(1)

setUpMultipleOutputs(1)

setUpMultipleInputs(1)

setUniqueWorkingDir(1)

setNumReducers(1)

setMatrixValueClass(1)

setMatricesDimensions(1)

setInstructionsInMapper(1)

computeMatrixCharacteristics(1)

setCombineInstructions(1)

setBlocksSizes(1)

setAggregateInstructions(1)

getMatrixCharacteristicsForOutput(1)

getCombineInstruction(1)

deriveRepresentation(1)

setUpOutputIndexesForMapper(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: ReaderBinaryBlock.java Proyecto: ripplebud/systemml

  /**
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   * @throws IllegalAccessException
   * @throws InstantiationException
   */
  @SuppressWarnings("deprecation")
  private void readBinaryBlockMatrixBlocksFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      Collection<IndexedMatrixValue> dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException {
    MatrixIndexes key = new MatrixIndexes();
    MatrixBlock value = new MatrixBlock();

    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
      MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
    {
      // directly read from sequence files (individual partfiles)
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

      try {
        while (reader.next(key, value)) {
          int row_offset = (int) (key.getRowIndex() - 1) * brlen;
          int col_offset = (int) (key.getColumnIndex() - 1) * bclen;
          int rows = value.getNumRows();
          int cols = value.getNumColumns();

          // bound check per block
          if (row_offset + rows < 0
              || row_offset + rows > rlen
              || col_offset + cols < 0
              || col_offset + cols > clen) {
            throw new IOException(
                "Matrix block ["
                    + (row_offset + 1)
                    + ":"
                    + (row_offset + rows)
                    + ","
                    + (col_offset + 1)
                    + ":"
                    + (col_offset + cols)
                    + "] "
                    + "out of overall matrix range [1:"
                    + rlen
                    + ",1:"
                    + clen
                    + "].");
          }

          // copy block to result
          dest.add(new IndexedMatrixValue(new MatrixIndexes(key), new MatrixBlock(value)));
        }
      } finally {
        IOUtilFunctions.closeSilently(reader);
      }
    }
  }

Ejemplo n.º 2

Mostrar archivo

Archivo: CombineMR.java Proyecto: ripplebud/systemml

    public void configure(JobConf job) {
      super.configure(job);
      try {
        comb_instructions = MRJobConfiguration.getCombineInstruction(job);

      } catch (Exception e) {
        throw new RuntimeException(e);
      }
      for (int i = 0; i < resultIndexes.length; i++) {
        MatrixCharacteristics stat =
            MRJobConfiguration.getMatrixCharacteristicsForOutput(job, resultIndexes[i]);
        outputBlockSizes.put(
            resultIndexes[i],
            new Pair<Integer, Integer>(stat.getRowsPerBlock(), stat.getColsPerBlock()));
      }
      for (MRInstruction ins : comb_instructions) {
        outputIndexesMapping.put(ins.output, getOutputIndexes(ins.output));
      }
    }

Ejemplo n.º 3

Mostrar archivo

Archivo: CombineMR.java Proyecto: ripplebud/systemml

  public static JobReturn runJob(
      MRJobInstruction inst,
      String[] inputs,
      InputInfo[] inputInfos,
      long[] rlens,
      long[] clens,
      int[] brlens,
      int[] bclens,
      String combineInstructions,
      int numReducers,
      int replication,
      byte[] resultIndexes,
      String[] outputs,
      OutputInfo[] outputInfos)
      throws Exception {
    JobConf job;
    job = new JobConf(CombineMR.class);
    job.setJobName("Standalone-MR");

    boolean inBlockRepresentation = MRJobConfiguration.deriveRepresentation(inputInfos);

    // whether use block representation or cell representation
    MRJobConfiguration.setMatrixValueClass(job, inBlockRepresentation);

    byte[] inputIndexes = new byte[inputs.length];
    for (byte b = 0; b < inputs.length; b++) inputIndexes[b] = b;

    // set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(
        job,
        inputIndexes,
        inputs,
        inputInfos,
        brlens,
        bclens,
        true,
        inBlockRepresentation ? ConvertTarget.BLOCK : ConvertTarget.CELL);

    // set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, inputIndexes, rlens, clens);

    // set up the block size
    MRJobConfiguration.setBlocksSizes(job, inputIndexes, brlens, bclens);

    // set up unary instructions that will perform in the mapper
    MRJobConfiguration.setInstructionsInMapper(job, "");

    // set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setAggregateInstructions(job, "");

    // set up the instructions that will happen in the reducer, after the aggregation instrucions
    MRJobConfiguration.setInstructionsInReducer(job, "");

    MRJobConfiguration.setCombineInstructions(job, combineInstructions);

    // set up the replication factor for the results
    job.setInt("dfs.replication", replication);

    // set up what matrices are needed to pass from the mapper to reducer
    HashSet<Byte> mapoutputIndexes =
        MRJobConfiguration.setUpOutputIndexesForMapper(
            job, inputIndexes, null, null, combineInstructions, resultIndexes);

    // set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(
        job, resultIndexes, null, outputs, outputInfos, inBlockRepresentation);

    // configure mapper and the mapper output key value pairs
    job.setMapperClass(GMRMapper.class);

    job.setMapOutputKeyClass(MatrixIndexes.class);
    if (inBlockRepresentation) job.setMapOutputValueClass(TaggedMatrixBlock.class);
    else job.setMapOutputValueClass(TaggedMatrixCell.class);

    // configure reducer
    job.setReducerClass(InnerReducer.class);
    // job.setReducerClass(PassThroughReducer.class);

    MatrixChar_N_ReducerGroups ret =
        MRJobConfiguration.computeMatrixCharacteristics(
            job,
            inputIndexes,
            null,
            null,
            null,
            combineInstructions,
            resultIndexes,
            mapoutputIndexes,
            false);
    MatrixCharacteristics[] stats = ret.stats;

    // set up the number of reducers
    MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers);

    // Print the complete instruction
    if (LOG.isTraceEnabled()) inst.printCompleteMRJobInstruction(stats);

    // By default, the job executes in "cluster" mode.
    // Determine if we can optimize and run it in "local" mode.
    MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
      inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
    }

    // set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);

    RunningJob runjob = JobClient.runJob(job);

    return new JobReturn(stats, runjob.isSuccessful());
  }

Ejemplo n.º 4

Mostrar archivo

Archivo: ReaderBinaryBlock.java Proyecto: ripplebud/systemml

  /**
   * Note: For efficiency, we directly use SequenceFile.Reader instead of SequenceFileInputFormat-
   * InputSplits-RecordReader (SequenceFileRecordReader). First, this has no drawbacks since the
   * SequenceFileRecordReader internally uses SequenceFile.Reader as well. Second, it is
   * advantageous if the actual sequence files are larger than the file splits created by
   * informat.getSplits (which is usually aligned to the HDFS block size) because then there is
   * overhead for finding the actual split between our 1k-1k blocks. This case happens if the read
   * matrix was create by CP or when jobs directly write to large output files (e.g., parfor matrix
   * partitioning).
   *
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   * @throws IllegalAccessException
   * @throws InstantiationException
   * @throws DMLRuntimeException
   */
  @SuppressWarnings("deprecation")
  private static void readBinaryBlockMatrixFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      MatrixBlock dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException, DMLRuntimeException {
    boolean sparse = dest.isInSparseFormat();
    MatrixIndexes key = new MatrixIndexes();
    MatrixBlock value = new MatrixBlock();

    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
      MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
    {
      // directly read from sequence files (individual partfiles)
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

      try {
        // note: next(key, value) does not yet exploit the given serialization classes, record
        // reader does but is generally slower.
        while (reader.next(key, value)) {
          // empty block filter (skip entire block)
          if (value.isEmptyBlock(false)) continue;

          int row_offset = (int) (key.getRowIndex() - 1) * brlen;
          int col_offset = (int) (key.getColumnIndex() - 1) * bclen;

          int rows = value.getNumRows();
          int cols = value.getNumColumns();

          // bound check per block
          if (row_offset + rows < 0
              || row_offset + rows > rlen
              || col_offset + cols < 0
              || col_offset + cols > clen) {
            throw new IOException(
                "Matrix block ["
                    + (row_offset + 1)
                    + ":"
                    + (row_offset + rows)
                    + ","
                    + (col_offset + 1)
                    + ":"
                    + (col_offset + cols)
                    + "] "
                    + "out of overall matrix range [1:"
                    + rlen
                    + ",1:"
                    + clen
                    + "].");
          }

          // copy block to result
          if (sparse) {
            dest.appendToSparse(value, row_offset, col_offset);
            // note: append requires final sort
          } else {
            dest.copy(
                row_offset, row_offset + rows - 1, col_offset, col_offset + cols - 1, value, false);
          }
        }
      } finally {
        IOUtilFunctions.closeSilently(reader);
      }
    }

    if (sparse && clen > bclen) {
      // no need to sort if 1 column block since always sorted
      dest.sortSparseRows();
    }
  }