예제 #1
0
  /**
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   * @throws IllegalAccessException
   * @throws InstantiationException
   */
  @SuppressWarnings("deprecation")
  private void readBinaryBlockMatrixBlocksFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      Collection<IndexedMatrixValue> dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException {
    MatrixIndexes key = new MatrixIndexes();
    MatrixBlock value = new MatrixBlock();

    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
      MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
    {
      // directly read from sequence files (individual partfiles)
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

      try {
        while (reader.next(key, value)) {
          int row_offset = (int) (key.getRowIndex() - 1) * brlen;
          int col_offset = (int) (key.getColumnIndex() - 1) * bclen;
          int rows = value.getNumRows();
          int cols = value.getNumColumns();

          // bound check per block
          if (row_offset + rows < 0
              || row_offset + rows > rlen
              || col_offset + cols < 0
              || col_offset + cols > clen) {
            throw new IOException(
                "Matrix block ["
                    + (row_offset + 1)
                    + ":"
                    + (row_offset + rows)
                    + ","
                    + (col_offset + 1)
                    + ":"
                    + (col_offset + cols)
                    + "] "
                    + "out of overall matrix range [1:"
                    + rlen
                    + ",1:"
                    + clen
                    + "].");
          }

          // copy block to result
          dest.add(new IndexedMatrixValue(new MatrixIndexes(key), new MatrixBlock(value)));
        }
      } finally {
        IOUtilFunctions.closeSilently(reader);
      }
    }
  }
예제 #2
0
    private void processBinaryCombineInstruction(CombineBinaryInstruction ins, Reporter reporter)
        throws IOException {

      IndexedMatrixValue in1 = cachedValues.getFirst(ins.input1);
      IndexedMatrixValue in2 = cachedValues.getFirst(ins.input2);
      if (in1 == null && in2 == null) return;

      MatrixIndexes indexes;
      if (in1 != null) indexes = in1.getIndexes();
      else indexes = in2.getIndexes();

      // if one of the inputs is null, then it is a all zero block
      if (in1 == null) {
        in1 = zeroInput;
        in1.getValue().reset(in2.getValue().getNumRows(), in2.getValue().getNumColumns());
      }

      if (in2 == null) {
        in2 = zeroInput;
        in2.getValue().reset(in1.getValue().getNumRows(), in1.getValue().getNumColumns());
      }

      // System.out.println("in1:"+in1);
      // System.out.println("in2:"+in2);

      // process instruction
      try {
        /*in1.getValue().combineOperations(in2.getValue(), collectFinalMultipleOutputs,
        reporter, keyBuff, valueBuff, getOutputIndexes(ins.output));*/

        ArrayList<Integer> outputIndexes = outputIndexesMapping.get(ins.output);
        for (int r = 0; r < in1.getValue().getNumRows(); r++)
          for (int c = 0; c < in1.getValue().getNumColumns(); c++) {
            Pair<Integer, Integer> blockSize = outputBlockSizes.get(ins.output);
            keyBuff.setIndexes(
                UtilFunctions.cellIndexCalculation(indexes.getRowIndex(), blockSize.getKey(), r),
                UtilFunctions.cellIndexCalculation(
                    indexes.getColumnIndex(), blockSize.getValue(), c));
            valueBuff.setValue(in1.getValue().getValue(r, c));
            double temp = in2.getValue().getValue(r, c);
            if (ins.isSecondInputWeight()) {
              valueBuff.setWeight(temp);
              valueBuff.setOtherValue(0);
            } else {
              valueBuff.setWeight(1);
              valueBuff.setOtherValue(temp);
            }

            for (int i : outputIndexes) {
              collectFinalMultipleOutputs.collectOutput(keyBuff, valueBuff, i, reporter);
              // System.out.println("output: "+keyBuff+" -- "+valueBuff);
            }
          }

      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
    @Override
    protected Tuple2<MatrixIndexes, MatrixBlock> computeNext(Tuple2<MatrixIndexes, MatrixBlock> arg)
        throws Exception {
      // unpack partition key-value pairs
      MatrixIndexes ix = arg._1();
      MatrixBlock in1 = arg._2();

      // get the rhs block
      int rix = (int) ((_vtype == VectorType.COL_VECTOR) ? ix.getRowIndex() : 1);
      int cix = (int) ((_vtype == VectorType.COL_VECTOR) ? 1 : ix.getColumnIndex());
      MatrixBlock in2 = _pmV.getMatrixBlock(rix, cix);

      // execute the binary operation
      MatrixBlock ret = (MatrixBlock) (in1.binaryOperations(_op, in2, new MatrixBlock()));
      return new Tuple2<MatrixIndexes, MatrixBlock>(ix, ret);
    }
예제 #4
0
  /**
   * Note: For efficiency, we directly use SequenceFile.Reader instead of SequenceFileInputFormat-
   * InputSplits-RecordReader (SequenceFileRecordReader). First, this has no drawbacks since the
   * SequenceFileRecordReader internally uses SequenceFile.Reader as well. Second, it is
   * advantageous if the actual sequence files are larger than the file splits created by
   * informat.getSplits (which is usually aligned to the HDFS block size) because then there is
   * overhead for finding the actual split between our 1k-1k blocks. This case happens if the read
   * matrix was create by CP or when jobs directly write to large output files (e.g., parfor matrix
   * partitioning).
   *
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   * @throws IllegalAccessException
   * @throws InstantiationException
   * @throws DMLRuntimeException
   */
  @SuppressWarnings("deprecation")
  private static void readBinaryBlockMatrixFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      MatrixBlock dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException, DMLRuntimeException {
    boolean sparse = dest.isInSparseFormat();
    MatrixIndexes key = new MatrixIndexes();
    MatrixBlock value = new MatrixBlock();

    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
      MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
    {
      // directly read from sequence files (individual partfiles)
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

      try {
        // note: next(key, value) does not yet exploit the given serialization classes, record
        // reader does but is generally slower.
        while (reader.next(key, value)) {
          // empty block filter (skip entire block)
          if (value.isEmptyBlock(false)) continue;

          int row_offset = (int) (key.getRowIndex() - 1) * brlen;
          int col_offset = (int) (key.getColumnIndex() - 1) * bclen;

          int rows = value.getNumRows();
          int cols = value.getNumColumns();

          // bound check per block
          if (row_offset + rows < 0
              || row_offset + rows > rlen
              || col_offset + cols < 0
              || col_offset + cols > clen) {
            throw new IOException(
                "Matrix block ["
                    + (row_offset + 1)
                    + ":"
                    + (row_offset + rows)
                    + ","
                    + (col_offset + 1)
                    + ":"
                    + (col_offset + cols)
                    + "] "
                    + "out of overall matrix range [1:"
                    + rlen
                    + ",1:"
                    + clen
                    + "].");
          }

          // copy block to result
          if (sparse) {
            dest.appendToSparse(value, row_offset, col_offset);
            // note: append requires final sort
          } else {
            dest.copy(
                row_offset, row_offset + rows - 1, col_offset, col_offset + cols - 1, value, false);
          }
        }
      } finally {
        IOUtilFunctions.closeSilently(reader);
      }
    }

    if (sparse && clen > bclen) {
      // no need to sort if 1 column block since always sorted
      dest.sortSparseRows();
    }
  }