/** * @param path * @param job * @param fs * @param dest * @param rlen * @param clen * @param brlen * @param bclen * @throws IOException */ @SuppressWarnings("deprecation") private void readBinaryCellMatrixFromHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen, long clen, int brlen, int bclen) throws IOException { boolean sparse = dest.isInSparseFormat(); MatrixIndexes key = new MatrixIndexes(); MatrixCell value = new MatrixCell(); int row = -1; int col = -1; try { for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files { // directly read from sequence files (individual partfiles) SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job); try { if (sparse) { while (reader.next(key, value)) { row = (int) key.getRowIndex() - 1; col = (int) key.getColumnIndex() - 1; double lvalue = value.getValue(); dest.appendValue(row, col, lvalue); } } else { while (reader.next(key, value)) { row = (int) key.getRowIndex() - 1; col = (int) key.getColumnIndex() - 1; double lvalue = value.getValue(); dest.appendValue(row, col, lvalue); } } } finally { IOUtilFunctions.closeSilently(reader); } } if (sparse) dest.sortSparseRows(); } catch (Exception ex) { // post-mortem error handling and bounds checking if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) { throw new IOException( "Matrix cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } else { throw new IOException("Unable to read matrix in binary cell format.", ex); } } }
@SuppressWarnings("deprecation") public void flushBuffer(Reporter reporter) throws RuntimeException { try { if (_mapBuffer != null) { MatrixIndexes key = null; // new MatrixIndexes(); MatrixCell value = new MatrixCell(); for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) { ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes); CTableMap resultMap = ctable.getValue(); // maintain result dims and nonzeros for (Integer i : resultIDs) { _resultNonZeros[i] += resultMap.size(); if (_resultDimsUnknown[i] == (byte) 1) { _resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]); _resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]); } } // output result data for (LLDoubleEntry e : resultMap.entrySet()) { key = new MatrixIndexes(e.key1, e.key2); value.setValue(e.value); for (Integer i : resultIDs) { _collector.collectOutput(key, value, i, reporter); } } } } else if (_blockBuffer != null) { MatrixIndexes key = new MatrixIndexes(1, 1); // DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(), // mc.get_rows_per_block(), mc.get_cols_per_block(), replication); for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) { ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes); MatrixBlock outBlock = ctable.getValue(); outBlock.recomputeNonZeros(); // TODO: change hard coding of 1000 int brlen = 1000, bclen = 1000; int rlen = outBlock.getNumRows(); int clen = outBlock.getNumColumns(); // final output matrix is smaller than a single block if (rlen <= brlen && clen <= brlen) { key = new MatrixIndexes(1, 1); for (Integer i : resultIDs) { _collector.collectOutput(key, outBlock, i, reporter); _resultNonZeros[i] += outBlock.getNonZeros(); } } else { // Following code is similar to that in // DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS // initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse( rlen, clen, brlen, bclen, true, outBlock.getNonZeros()); // create and write subblocks of matrix for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) { for (int blockCol = 0; blockCol < (int) Math.ceil(clen / (double) bclen); blockCol++) { int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen; int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen; int row_offset = blockRow * brlen; int col_offset = blockCol * bclen; // get reuse matrix block MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen); // copy submatrix to block outBlock.sliceOperations( row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block); // TODO: skip empty "block" // append block to sequence file key.setIndexes(blockRow + 1, blockCol + 1); for (Integer i : resultIDs) { _collector.collectOutput(key, block, i, reporter); _resultNonZeros[i] += block.getNonZeros(); } // reset block for later reuse block.reset(); } } } } } else { throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty."); } } catch (Exception ex) { throw new RuntimeException("Failed to flush ctable buffer.", ex); } // remove existing partial ctables if (_mapBuffer != null) _mapBuffer.clear(); else _blockBuffer.clear(); }