예제 #1
0
  /**
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   * @throws IllegalAccessException
   * @throws InstantiationException
   */
  @SuppressWarnings("deprecation")
  private void readBinaryBlockMatrixBlocksFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      Collection<IndexedMatrixValue> dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException {
    MatrixIndexes key = new MatrixIndexes();
    MatrixBlock value = new MatrixBlock();

    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
      MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
    {
      // directly read from sequence files (individual partfiles)
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

      try {
        while (reader.next(key, value)) {
          int row_offset = (int) (key.getRowIndex() - 1) * brlen;
          int col_offset = (int) (key.getColumnIndex() - 1) * bclen;
          int rows = value.getNumRows();
          int cols = value.getNumColumns();

          // bound check per block
          if (row_offset + rows < 0
              || row_offset + rows > rlen
              || col_offset + cols < 0
              || col_offset + cols > clen) {
            throw new IOException(
                "Matrix block ["
                    + (row_offset + 1)
                    + ":"
                    + (row_offset + rows)
                    + ","
                    + (col_offset + 1)
                    + ":"
                    + (col_offset + cols)
                    + "] "
                    + "out of overall matrix range [1:"
                    + rlen
                    + ",1:"
                    + clen
                    + "].");
          }

          // copy block to result
          dest.add(new IndexedMatrixValue(new MatrixIndexes(key), new MatrixBlock(value)));
        }
      } finally {
        IOUtilFunctions.closeSilently(reader);
      }
    }
  }
예제 #2
0
  // output the records in the outCache.
  public void close() throws IOException {
    long start = System.currentTimeMillis();
    Iterator<Entry<MatrixIndexes, MatrixValue>> it = outCache.entrySet().iterator();
    while (it.hasNext()) {
      Entry<MatrixIndexes, MatrixValue> entry = it.next();
      realWriteToCollector(entry.getKey(), entry.getValue());
    }

    // handle empty block output (on first reduce task only)
    if (outputDummyRecords) // required for rejecting empty blocks in mappers
    {
      long rlen = dim1.getRows();
      long clen = dim2.getCols();
      int brlen = dim1.getRowsPerBlock();
      int bclen = dim2.getColsPerBlock();
      MatrixIndexes tmpIx = new MatrixIndexes();
      MatrixBlock tmpVal = new MatrixBlock();
      for (long i = 0, r = 1; i < rlen; i += brlen, r++)
        for (long j = 0, c = 1; j < clen; j += bclen, c++) {
          int realBrlen = (int) Math.min((long) brlen, rlen - (r - 1) * brlen);
          int realBclen = (int) Math.min((long) bclen, clen - (c - 1) * bclen);
          tmpIx.setIndexes(r, c);
          tmpVal.reset(realBrlen, realBclen);
          collectFinalMultipleOutputs.collectOutput(tmpIx, tmpVal, 0, cachedReporter);
        }
    }

    if (cachedReporter != null)
      cachedReporter.incrCounter(
          Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
    super.close();
  }
예제 #3
0
    private void processBinaryCombineInstruction(CombineBinaryInstruction ins, Reporter reporter)
        throws IOException {

      IndexedMatrixValue in1 = cachedValues.getFirst(ins.input1);
      IndexedMatrixValue in2 = cachedValues.getFirst(ins.input2);
      if (in1 == null && in2 == null) return;

      MatrixIndexes indexes;
      if (in1 != null) indexes = in1.getIndexes();
      else indexes = in2.getIndexes();

      // if one of the inputs is null, then it is a all zero block
      if (in1 == null) {
        in1 = zeroInput;
        in1.getValue().reset(in2.getValue().getNumRows(), in2.getValue().getNumColumns());
      }

      if (in2 == null) {
        in2 = zeroInput;
        in2.getValue().reset(in1.getValue().getNumRows(), in1.getValue().getNumColumns());
      }

      // System.out.println("in1:"+in1);
      // System.out.println("in2:"+in2);

      // process instruction
      try {
        /*in1.getValue().combineOperations(in2.getValue(), collectFinalMultipleOutputs,
        reporter, keyBuff, valueBuff, getOutputIndexes(ins.output));*/

        ArrayList<Integer> outputIndexes = outputIndexesMapping.get(ins.output);
        for (int r = 0; r < in1.getValue().getNumRows(); r++)
          for (int c = 0; c < in1.getValue().getNumColumns(); c++) {
            Pair<Integer, Integer> blockSize = outputBlockSizes.get(ins.output);
            keyBuff.setIndexes(
                UtilFunctions.cellIndexCalculation(indexes.getRowIndex(), blockSize.getKey(), r),
                UtilFunctions.cellIndexCalculation(
                    indexes.getColumnIndex(), blockSize.getValue(), c));
            valueBuff.setValue(in1.getValue().getValue(r, c));
            double temp = in2.getValue().getValue(r, c);
            if (ins.isSecondInputWeight()) {
              valueBuff.setWeight(temp);
              valueBuff.setOtherValue(0);
            } else {
              valueBuff.setWeight(1);
              valueBuff.setOtherValue(temp);
            }

            for (int i : outputIndexes) {
              collectFinalMultipleOutputs.collectOutput(keyBuff, valueBuff, i, reporter);
              // System.out.println("output: "+keyBuff+" -- "+valueBuff);
            }
          }

      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
    @Override
    protected Tuple2<MatrixIndexes, MatrixBlock> computeNext(Tuple2<MatrixIndexes, MatrixBlock> arg)
        throws Exception {
      // unpack partition key-value pairs
      MatrixIndexes ix = arg._1();
      MatrixBlock in1 = arg._2();

      // get the rhs block
      int rix = (int) ((_vtype == VectorType.COL_VECTOR) ? ix.getRowIndex() : 1);
      int cix = (int) ((_vtype == VectorType.COL_VECTOR) ? 1 : ix.getColumnIndex());
      MatrixBlock in2 = _pmV.getMatrixBlock(rix, cix);

      // execute the binary operation
      MatrixBlock ret = (MatrixBlock) (in1.binaryOperations(_op, in2, new MatrixBlock()));
      return new Tuple2<MatrixIndexes, MatrixBlock>(ix, ret);
    }
예제 #5
0
    private void processTernaryCombineInstruction(CombineTernaryInstruction ins, Reporter reporter)
        throws IOException {
      IndexedMatrixValue in1 = cachedValues.getFirst(ins.input1);
      IndexedMatrixValue in2 = cachedValues.getFirst(ins.input2);
      IndexedMatrixValue in3 = cachedValues.getFirst(ins.input3);
      if (in1 == null && in2 == null && in3 == null) return;
      int nr = 0, nc = 0;
      if (in1 != null) {
        nr = in1.getValue().getNumRows();
        nc = in1.getValue().getNumColumns();
      } else if (in2 != null) {
        nr = in2.getValue().getNumRows();
        nc = in2.getValue().getNumColumns();
      } else {
        nr = in3.getValue().getNumRows();
        nc = in3.getValue().getNumColumns();
      }

      // if one of the inputs is null, then it is a all zero block
      if (in1 == null) {
        in1 = zeroInput;
        in1.getValue().reset(nr, nc);
      }

      if (in2 == null) {
        in2 = zeroInput;
        in2.getValue().reset(nr, nc);
      }

      if (in3 == null) {
        in3 = zeroInput;
        in3.getValue().reset(nr, nc);
      }

      // process instruction
      try {

        ArrayList<Integer> outputIndexes = outputIndexesMapping.get(ins.output);
        for (int r = 0; r < nr; r++)
          for (int c = 0; c < nc; c++) {
            Pair<Integer, Integer> blockSize = outputBlockSizes.get(ins.output);
            keyBuff.setIndexes(
                UtilFunctions.cellIndexCalculation(
                    in1.getIndexes().getRowIndex(), blockSize.getKey(), r),
                UtilFunctions.cellIndexCalculation(
                    in1.getIndexes().getColumnIndex(), blockSize.getValue(), c));
            valueBuff.setValue(in1.getValue().getValue(r, c));
            valueBuff.setOtherValue(in2.getValue().getValue(r, c));
            valueBuff.setWeight(in3.getValue().getValue(r, c));
            for (int i : outputIndexes) {
              collectFinalMultipleOutputs.collectOutput(keyBuff, valueBuff, i, reporter);
              // System.out.println("output: "+keyBuff+" -- "+valueBuff);
            }
          }

      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
예제 #6
0
  private void processJoin(int tag, RemainIndexValue rValue) throws Exception {

    // for the cached matrix
    if (tag == 0) {
      addToCache(rValue, tag);
      //		LOG.info("put in the buffer for left matrix");
      //		LOG.info(rblock.block.toString());
    } else // for the probing matrix
    {
      // LOG.info("process join with block size: "+rValue.value.getNumRows()+" X
      // "+rValue.value.getNumColumns()+" nonZeros: "+rValue.value.getNonZeros());
      for (int i = 0; i < cacheSize; i++) {
        RemainIndexValue left, right;
        if (tagForLeft == 0) {
          left = cache.get(i);
          right = rValue;
        } else {
          right = cache.get(i);
          left = rValue;
        }
        indexesbuffer.setIndexes(left.remainIndex, right.remainIndex);
        try {
          OperationsOnMatrixValues.performAggregateBinaryIgnoreIndexes(
              left.value,
              right.value,
              valueBuffer,
              (AggregateBinaryOperator) aggBinInstruction.getOperator());
        } catch (DMLUnsupportedOperationException e) {
          throw new IOException(e);
        }

        // if(valueBuffer.getNonZeros()>0)
        collectOutput(indexesbuffer, valueBuffer);
      }
    }
  }
예제 #7
0
 @Override
 public void execute(MatrixIndexes in, MatrixIndexes out) {
   // only used for V2M
   out.setIndexes(in.getRowIndex(), in.getRowIndex());
 }
예제 #8
0
  /**
   * Note: For efficiency, we directly use SequenceFile.Reader instead of SequenceFileInputFormat-
   * InputSplits-RecordReader (SequenceFileRecordReader). First, this has no drawbacks since the
   * SequenceFileRecordReader internally uses SequenceFile.Reader as well. Second, it is
   * advantageous if the actual sequence files are larger than the file splits created by
   * informat.getSplits (which is usually aligned to the HDFS block size) because then there is
   * overhead for finding the actual split between our 1k-1k blocks. This case happens if the read
   * matrix was create by CP or when jobs directly write to large output files (e.g., parfor matrix
   * partitioning).
   *
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   * @throws IllegalAccessException
   * @throws InstantiationException
   * @throws DMLRuntimeException
   */
  @SuppressWarnings("deprecation")
  private static void readBinaryBlockMatrixFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      MatrixBlock dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException, DMLRuntimeException {
    boolean sparse = dest.isInSparseFormat();
    MatrixIndexes key = new MatrixIndexes();
    MatrixBlock value = new MatrixBlock();

    // set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
      MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
    {
      // directly read from sequence files (individual partfiles)
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

      try {
        // note: next(key, value) does not yet exploit the given serialization classes, record
        // reader does but is generally slower.
        while (reader.next(key, value)) {
          // empty block filter (skip entire block)
          if (value.isEmptyBlock(false)) continue;

          int row_offset = (int) (key.getRowIndex() - 1) * brlen;
          int col_offset = (int) (key.getColumnIndex() - 1) * bclen;

          int rows = value.getNumRows();
          int cols = value.getNumColumns();

          // bound check per block
          if (row_offset + rows < 0
              || row_offset + rows > rlen
              || col_offset + cols < 0
              || col_offset + cols > clen) {
            throw new IOException(
                "Matrix block ["
                    + (row_offset + 1)
                    + ":"
                    + (row_offset + rows)
                    + ","
                    + (col_offset + 1)
                    + ":"
                    + (col_offset + cols)
                    + "] "
                    + "out of overall matrix range [1:"
                    + rlen
                    + ",1:"
                    + clen
                    + "].");
          }

          // copy block to result
          if (sparse) {
            dest.appendToSparse(value, row_offset, col_offset);
            // note: append requires final sort
          } else {
            dest.copy(
                row_offset, row_offset + rows - 1, col_offset, col_offset + cols - 1, value, false);
          }
        }
      } finally {
        IOUtilFunctions.closeSilently(reader);
      }
    }

    if (sparse && clen > bclen) {
      // no need to sort if 1 column block since always sorted
      dest.sortSparseRows();
    }
  }
예제 #9
0
 @Override
 public void execute(MatrixIndexes in, MatrixIndexes out) {
   out.setIndexes(1, 1);
 }