@Override public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call( Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>(); MatrixIndexes ixIn = arg0._1(); MatrixBlock mb2 = arg0._2(); // get the right hand side matrix MatrixBlock mb1 = _pmV.getMatrixBlock((int) ixIn.getRowIndex(), 1); // compute target block indexes long minPos = UtilFunctions.toLong(mb1.minNonZero()); long maxPos = UtilFunctions.toLong(mb1.max()); long rowIX1 = (minPos - 1) / _brlen + 1; long rowIX2 = (maxPos - 1) / _brlen + 1; boolean multipleOuts = (rowIX1 != rowIX2); if (minPos >= 1) // at least one row selected { // output sparsity estimate double spmb1 = OptimizerUtils.getSparsity(mb1.getNumRows(), 1, mb1.getNonZeros()); long estnnz = (long) (spmb1 * mb2.getNonZeros()); boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, mb2.getNumColumns(), estnnz); // compute and allocate output blocks MatrixBlock out1 = new MatrixBlock(); MatrixBlock out2 = multipleOuts ? new MatrixBlock() : null; out1.reset(_brlen, mb2.getNumColumns(), sparse); if (out2 != null) out2.reset( UtilFunctions.computeBlockSize(_rlen, rowIX2, _brlen), mb2.getNumColumns(), sparse); // compute core matrix permutation (assumes that out1 has default blocksize, // hence we do a meta data correction afterwards) mb1.permutationMatrixMultOperations(mb2, out1, out2); out1.setNumRows(UtilFunctions.computeBlockSize(_rlen, rowIX1, _brlen)); ret.add( new Tuple2<MatrixIndexes, MatrixBlock>( new MatrixIndexes(rowIX1, ixIn.getColumnIndex()), out1)); if (out2 != null) ret.add( new Tuple2<MatrixIndexes, MatrixBlock>( new MatrixIndexes(rowIX2, ixIn.getColumnIndex()), out2)); } return ret; }
/** * @param vector * @param singleColBlock * @param dense * @param unknownDims */ private void testDataFrameConversion( ValueType[] schema, boolean containsID, boolean dense, boolean unknownDims) { boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG; RUNTIME_PLATFORM oldPlatform = DMLScript.rtplatform; SparkExecutionContext sec = null; try { DMLScript.USE_LOCAL_SPARK_CONFIG = true; DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; // generate input data and setup metadata int cols = schema.length + colsVector - 1; double sparsity = dense ? sparsity1 : sparsity2; double[][] A = TestUtils.round(getRandomMatrix(rows1, cols, -10, 1000, sparsity, 2373)); MatrixBlock mbA = DataConverter.convertToMatrixBlock(A); int blksz = ConfigurationManager.getBlocksize(); MatrixCharacteristics mc1 = new MatrixCharacteristics(rows1, cols, blksz, blksz, mbA.getNonZeros()); MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1); // setup spark context sec = (SparkExecutionContext) ExecutionContextFactory.createContext(); JavaSparkContext sc = sec.getSparkContext(); SQLContext sqlctx = new SQLContext(sc); // create input data frame DataFrame df = createDataFrame(sqlctx, mbA, containsID, schema); // dataframe - frame conversion JavaPairRDD<Long, FrameBlock> out = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, containsID); // get output frame block FrameBlock fbB = SparkExecutionContext.toFrameBlock( out, UtilFunctions.nCopies(cols, ValueType.DOUBLE), rows1, cols); // compare frame blocks MatrixBlock mbB = DataConverter.convertToMatrixBlock(fbB); double[][] B = DataConverter.convertToDoubleMatrix(mbB); TestUtils.compareMatrices(A, B, rows1, cols, eps); } catch (Exception ex) { throw new RuntimeException(ex); } finally { sec.close(); DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig; DMLScript.rtplatform = oldPlatform; } }
@Override public MatrixBlock call(MatrixBlock arg0) throws Exception { _aNnz.add((double) arg0.getNonZeros()); return arg0; }
@SuppressWarnings("deprecation") public void flushBuffer(Reporter reporter) throws RuntimeException { try { if (_mapBuffer != null) { MatrixIndexes key = null; // new MatrixIndexes(); MatrixCell value = new MatrixCell(); for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) { ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes); CTableMap resultMap = ctable.getValue(); // maintain result dims and nonzeros for (Integer i : resultIDs) { _resultNonZeros[i] += resultMap.size(); if (_resultDimsUnknown[i] == (byte) 1) { _resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]); _resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]); } } // output result data for (LLDoubleEntry e : resultMap.entrySet()) { key = new MatrixIndexes(e.key1, e.key2); value.setValue(e.value); for (Integer i : resultIDs) { _collector.collectOutput(key, value, i, reporter); } } } } else if (_blockBuffer != null) { MatrixIndexes key = new MatrixIndexes(1, 1); // DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(), // mc.get_rows_per_block(), mc.get_cols_per_block(), replication); for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) { ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes); MatrixBlock outBlock = ctable.getValue(); outBlock.recomputeNonZeros(); // TODO: change hard coding of 1000 int brlen = 1000, bclen = 1000; int rlen = outBlock.getNumRows(); int clen = outBlock.getNumColumns(); // final output matrix is smaller than a single block if (rlen <= brlen && clen <= brlen) { key = new MatrixIndexes(1, 1); for (Integer i : resultIDs) { _collector.collectOutput(key, outBlock, i, reporter); _resultNonZeros[i] += outBlock.getNonZeros(); } } else { // Following code is similar to that in // DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS // initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse( rlen, clen, brlen, bclen, true, outBlock.getNonZeros()); // create and write subblocks of matrix for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) { for (int blockCol = 0; blockCol < (int) Math.ceil(clen / (double) bclen); blockCol++) { int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen; int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen; int row_offset = blockRow * brlen; int col_offset = blockCol * bclen; // get reuse matrix block MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen); // copy submatrix to block outBlock.sliceOperations( row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block); // TODO: skip empty "block" // append block to sequence file key.setIndexes(blockRow + 1, blockCol + 1); for (Integer i : resultIDs) { _collector.collectOutput(key, block, i, reporter); _resultNonZeros[i] += block.getNonZeros(); } // reset block for later reuse block.reset(); } } } } } else { throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty."); } } catch (Exception ex) { throw new RuntimeException("Failed to flush ctable buffer.", ex); } // remove existing partial ctables if (_mapBuffer != null) _mapBuffer.clear(); else _blockBuffer.clear(); }