@Override public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call( Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>(); MatrixIndexes ixIn = arg0._1(); MatrixBlock mb2 = arg0._2(); // get the right hand side matrix MatrixBlock mb1 = _pmV.getMatrixBlock((int) ixIn.getRowIndex(), 1); // compute target block indexes long minPos = UtilFunctions.toLong(mb1.minNonZero()); long maxPos = UtilFunctions.toLong(mb1.max()); long rowIX1 = (minPos - 1) / _brlen + 1; long rowIX2 = (maxPos - 1) / _brlen + 1; boolean multipleOuts = (rowIX1 != rowIX2); if (minPos >= 1) // at least one row selected { // output sparsity estimate double spmb1 = OptimizerUtils.getSparsity(mb1.getNumRows(), 1, mb1.getNonZeros()); long estnnz = (long) (spmb1 * mb2.getNonZeros()); boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, mb2.getNumColumns(), estnnz); // compute and allocate output blocks MatrixBlock out1 = new MatrixBlock(); MatrixBlock out2 = multipleOuts ? new MatrixBlock() : null; out1.reset(_brlen, mb2.getNumColumns(), sparse); if (out2 != null) out2.reset( UtilFunctions.computeBlockSize(_rlen, rowIX2, _brlen), mb2.getNumColumns(), sparse); // compute core matrix permutation (assumes that out1 has default blocksize, // hence we do a meta data correction afterwards) mb1.permutationMatrixMultOperations(mb2, out1, out2); out1.setNumRows(UtilFunctions.computeBlockSize(_rlen, rowIX1, _brlen)); ret.add( new Tuple2<MatrixIndexes, MatrixBlock>( new MatrixIndexes(rowIX1, ixIn.getColumnIndex()), out1)); if (out2 != null) ret.add( new Tuple2<MatrixIndexes, MatrixBlock>( new MatrixIndexes(rowIX2, ixIn.getColumnIndex()), out2)); } return ret; }
@Override public void processInstruction( Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) throws DMLRuntimeException { ArrayList<IndexedMatrixValue> blkList = cachedValues.get(input); if (blkList == null) return; for (IndexedMatrixValue in1 : blkList) { if (in1 == null) continue; MatrixIndexes inix = in1.getIndexes(); MatrixBlock blk = (MatrixBlock) in1.getValue(); long rixOffset = (inix.getRowIndex() - 1) * blockRowFactor; boolean firstBlk = (inix.getRowIndex() == 1); boolean lastBlk = (inix.getRowIndex() == _lastRowBlockIndex); // introduce offsets w/ init value for first row if (firstBlk) { IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass); ((MatrixBlock) out.getValue()).reset(1, blk.getNumColumns()); if (_initValue != 0) { for (int j = 0; j < blk.getNumColumns(); j++) ((MatrixBlock) out.getValue()).appendValue(0, j, _initValue); } out.getIndexes().setIndexes(1, inix.getColumnIndex()); } // output splitting (shift by one), preaggregated offset used by subsequent block for (int i = 0; i < blk.getNumRows(); i++) if (!(lastBlk && i == (blk.getNumRows() - 1))) // ignore last row { IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass); MatrixBlock tmpBlk = (MatrixBlock) out.getValue(); tmpBlk.reset(1, blk.getNumColumns()); blk.sliceOperations(i, i, 0, blk.getNumColumns() - 1, tmpBlk); out.getIndexes().setIndexes(rixOffset + i + 2, inix.getColumnIndex()); } } }
@Override public void processInstruction( Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) throws DMLRuntimeException { IndexedMatrixValue in1 = cachedValues.getFirst(input1); // original data IndexedMatrixValue in2 = cachedValues.getFirst(input2); // offset row vector if (in1 == null || in2 == null) throw new DMLRuntimeException( "Unexpected empty input (left=" + ((in1 == null) ? "null" : in1.getIndexes()) + ", right=" + ((in2 == null) ? "null" : in2.getIndexes()) + ")."); // prepare inputs and outputs IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass); MatrixBlock data = (MatrixBlock) in1.getValue(); MatrixBlock offset = (MatrixBlock) in2.getValue(); MatrixBlock blk = (MatrixBlock) out.getValue(); blk.reset(data.getNumRows(), data.getNumColumns()); // blockwise offset aggregation and prefix sum computation MatrixBlock data2 = new MatrixBlock(data); // cp data MatrixBlock fdata2 = data2.sliceOperations(0, 0, 0, data2.getNumColumns() - 1, new MatrixBlock()); // 1-based fdata2.binaryOperationsInPlace(_bop, offset); // sum offset to first row data2.copy(0, 0, 0, data2.getNumColumns() - 1, fdata2, true); // 0-based data2.unaryOperations(_uop, blk); // compute columnwise prefix sums/prod/min/max // set output indexes out.getIndexes().setIndexes(in1.getIndexes()); }
@SuppressWarnings("deprecation") public void flushBuffer(Reporter reporter) throws RuntimeException { try { if (_mapBuffer != null) { MatrixIndexes key = null; // new MatrixIndexes(); MatrixCell value = new MatrixCell(); for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) { ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes); CTableMap resultMap = ctable.getValue(); // maintain result dims and nonzeros for (Integer i : resultIDs) { _resultNonZeros[i] += resultMap.size(); if (_resultDimsUnknown[i] == (byte) 1) { _resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]); _resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]); } } // output result data for (LLDoubleEntry e : resultMap.entrySet()) { key = new MatrixIndexes(e.key1, e.key2); value.setValue(e.value); for (Integer i : resultIDs) { _collector.collectOutput(key, value, i, reporter); } } } } else if (_blockBuffer != null) { MatrixIndexes key = new MatrixIndexes(1, 1); // DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(), // mc.get_rows_per_block(), mc.get_cols_per_block(), replication); for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) { ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes); MatrixBlock outBlock = ctable.getValue(); outBlock.recomputeNonZeros(); // TODO: change hard coding of 1000 int brlen = 1000, bclen = 1000; int rlen = outBlock.getNumRows(); int clen = outBlock.getNumColumns(); // final output matrix is smaller than a single block if (rlen <= brlen && clen <= brlen) { key = new MatrixIndexes(1, 1); for (Integer i : resultIDs) { _collector.collectOutput(key, outBlock, i, reporter); _resultNonZeros[i] += outBlock.getNonZeros(); } } else { // Following code is similar to that in // DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS // initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse( rlen, clen, brlen, bclen, true, outBlock.getNonZeros()); // create and write subblocks of matrix for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) { for (int blockCol = 0; blockCol < (int) Math.ceil(clen / (double) bclen); blockCol++) { int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen; int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen; int row_offset = blockRow * brlen; int col_offset = blockCol * bclen; // get reuse matrix block MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen); // copy submatrix to block outBlock.sliceOperations( row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block); // TODO: skip empty "block" // append block to sequence file key.setIndexes(blockRow + 1, blockCol + 1); for (Integer i : resultIDs) { _collector.collectOutput(key, block, i, reporter); _resultNonZeros[i] += block.getNonZeros(); } // reset block for later reuse block.reset(); } } } } } else { throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty."); } } catch (Exception ex) { throw new RuntimeException("Failed to flush ctable buffer.", ex); } // remove existing partial ctables if (_mapBuffer != null) _mapBuffer.clear(); else _blockBuffer.clear(); }