/** * @param path * @param job * @param fs * @param dest * @param rlen * @param clen * @param brlen * @param bclen * @throws IOException * @throws IllegalAccessException * @throws InstantiationException */ @SuppressWarnings("deprecation") private void readBinaryBlockMatrixBlocksFromHDFS( Path path, JobConf job, FileSystem fs, Collection<IndexedMatrixValue> dest, long rlen, long clen, int brlen, int bclen) throws IOException { MatrixIndexes key = new MatrixIndexes(); MatrixBlock value = new MatrixBlock(); // set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files { // directly read from sequence files (individual partfiles) SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job); try { while (reader.next(key, value)) { int row_offset = (int) (key.getRowIndex() - 1) * brlen; int col_offset = (int) (key.getColumnIndex() - 1) * bclen; int rows = value.getNumRows(); int cols = value.getNumColumns(); // bound check per block if (row_offset + rows < 0 || row_offset + rows > rlen || col_offset + cols < 0 || col_offset + cols > clen) { throw new IOException( "Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } // copy block to result dest.add(new IndexedMatrixValue(new MatrixIndexes(key), new MatrixBlock(value))); } } finally { IOUtilFunctions.closeSilently(reader); } } }
private void processBinaryCombineInstruction(CombineBinaryInstruction ins, Reporter reporter) throws IOException { IndexedMatrixValue in1 = cachedValues.getFirst(ins.input1); IndexedMatrixValue in2 = cachedValues.getFirst(ins.input2); if (in1 == null && in2 == null) return; MatrixIndexes indexes; if (in1 != null) indexes = in1.getIndexes(); else indexes = in2.getIndexes(); // if one of the inputs is null, then it is a all zero block if (in1 == null) { in1 = zeroInput; in1.getValue().reset(in2.getValue().getNumRows(), in2.getValue().getNumColumns()); } if (in2 == null) { in2 = zeroInput; in2.getValue().reset(in1.getValue().getNumRows(), in1.getValue().getNumColumns()); } // System.out.println("in1:"+in1); // System.out.println("in2:"+in2); // process instruction try { /*in1.getValue().combineOperations(in2.getValue(), collectFinalMultipleOutputs, reporter, keyBuff, valueBuff, getOutputIndexes(ins.output));*/ ArrayList<Integer> outputIndexes = outputIndexesMapping.get(ins.output); for (int r = 0; r < in1.getValue().getNumRows(); r++) for (int c = 0; c < in1.getValue().getNumColumns(); c++) { Pair<Integer, Integer> blockSize = outputBlockSizes.get(ins.output); keyBuff.setIndexes( UtilFunctions.cellIndexCalculation(indexes.getRowIndex(), blockSize.getKey(), r), UtilFunctions.cellIndexCalculation( indexes.getColumnIndex(), blockSize.getValue(), c)); valueBuff.setValue(in1.getValue().getValue(r, c)); double temp = in2.getValue().getValue(r, c); if (ins.isSecondInputWeight()) { valueBuff.setWeight(temp); valueBuff.setOtherValue(0); } else { valueBuff.setWeight(1); valueBuff.setOtherValue(temp); } for (int i : outputIndexes) { collectFinalMultipleOutputs.collectOutput(keyBuff, valueBuff, i, reporter); // System.out.println("output: "+keyBuff+" -- "+valueBuff); } } } catch (Exception e) { throw new RuntimeException(e); } }
@Override protected Tuple2<MatrixIndexes, MatrixBlock> computeNext(Tuple2<MatrixIndexes, MatrixBlock> arg) throws Exception { // unpack partition key-value pairs MatrixIndexes ix = arg._1(); MatrixBlock in1 = arg._2(); // get the rhs block int rix = (int) ((_vtype == VectorType.COL_VECTOR) ? ix.getRowIndex() : 1); int cix = (int) ((_vtype == VectorType.COL_VECTOR) ? 1 : ix.getColumnIndex()); MatrixBlock in2 = _pmV.getMatrixBlock(rix, cix); // execute the binary operation MatrixBlock ret = (MatrixBlock) (in1.binaryOperations(_op, in2, new MatrixBlock())); return new Tuple2<MatrixIndexes, MatrixBlock>(ix, ret); }
/** * Note: For efficiency, we directly use SequenceFile.Reader instead of SequenceFileInputFormat- * InputSplits-RecordReader (SequenceFileRecordReader). First, this has no drawbacks since the * SequenceFileRecordReader internally uses SequenceFile.Reader as well. Second, it is * advantageous if the actual sequence files are larger than the file splits created by * informat.getSplits (which is usually aligned to the HDFS block size) because then there is * overhead for finding the actual split between our 1k-1k blocks. This case happens if the read * matrix was create by CP or when jobs directly write to large output files (e.g., parfor matrix * partitioning). * * @param path * @param job * @param fs * @param dest * @param rlen * @param clen * @param brlen * @param bclen * @throws IOException * @throws IllegalAccessException * @throws InstantiationException * @throws DMLRuntimeException */ @SuppressWarnings("deprecation") private static void readBinaryBlockMatrixFromHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException { boolean sparse = dest.isInSparseFormat(); MatrixIndexes key = new MatrixIndexes(); MatrixBlock value = new MatrixBlock(); // set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files { // directly read from sequence files (individual partfiles) SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job); try { // note: next(key, value) does not yet exploit the given serialization classes, record // reader does but is generally slower. while (reader.next(key, value)) { // empty block filter (skip entire block) if (value.isEmptyBlock(false)) continue; int row_offset = (int) (key.getRowIndex() - 1) * brlen; int col_offset = (int) (key.getColumnIndex() - 1) * bclen; int rows = value.getNumRows(); int cols = value.getNumColumns(); // bound check per block if (row_offset + rows < 0 || row_offset + rows > rlen || col_offset + cols < 0 || col_offset + cols > clen) { throw new IOException( "Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } // copy block to result if (sparse) { dest.appendToSparse(value, row_offset, col_offset); // note: append requires final sort } else { dest.copy( row_offset, row_offset + rows - 1, col_offset, col_offset + cols - 1, value, false); } } } finally { IOUtilFunctions.closeSilently(reader); } } if (sparse && clen > bclen) { // no need to sort if 1 column block since always sorted dest.sortSparseRows(); } }