@Override public void processInstruction( Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int brlen, int bclen) throws DMLUnsupportedOperationException, DMLRuntimeException { // get both inputs IndexedMatrixValue left = cachedValues.getFirst(input1); IndexedMatrixValue right = cachedValues.getFirst(input2); // check non-existing block if (left == null || right == null) throw new DMLRuntimeException( "Missing append input: isNull(left): " + (left == null) + ", isNull(right): " + (right == null)); // core append operation MatrixBlock mbLeft = (MatrixBlock) left.getValue(); MatrixBlock mbRight = (MatrixBlock) right.getValue(); MatrixBlock ret = mbLeft.appendOperations(mbRight, new MatrixBlock(), _cbind); // put result into cache cachedValues.add(output, new IndexedMatrixValue(left.getIndexes(), ret)); }
@Override public final void writeMatrixToHDFS( MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen, long nnz) throws IOException, DMLRuntimeException { // validity check matrix dimensions if (src.getNumRows() != rlen || src.getNumColumns() != clen) { throw new IOException( "Matrix dimensions mismatch with metadata: " + src.getNumRows() + "x" + src.getNumColumns() + " vs " + rlen + "x" + clen + "."); } // prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fname); // if the file already exists on HDFS, remove it. MapReduceTool.deleteFileIfExistOnHDFS(fname); // core write (sequential/parallel) writeCSVMatrixToHDFS(path, job, fs, src, _props); IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path); }
/** * @param auop * @param mb * @return * @throws DMLRuntimeException */ private static double replaceUnaryAggregate(AggUnaryOp auop, MatrixBlock mb) throws DMLRuntimeException { // setup stats reporting if necessary boolean REPORT_STATS = (DMLScript.STATISTICS && REPORT_LITERAL_REPLACE_OPS_STATS); long t0 = REPORT_STATS ? System.nanoTime() : 0; // compute required unary aggregate double val = Double.MAX_VALUE; switch (auop.getOp()) { case SUM: val = mb.sum(); break; case SUM_SQ: val = mb.sumSq(); break; case MIN: val = mb.min(); break; case MAX: val = mb.max(); break; default: throw new DMLRuntimeException("Unsupported unary aggregate replacement: " + auop.getOp()); } // report statistics if necessary if (REPORT_STATS) { long t1 = System.nanoTime(); Statistics.maintainCPHeavyHitters("rlit", t1 - t0); } return val; }
@Override public Tuple2<MatrixIndexes, MatrixBlock> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception { MatrixIndexes ixIn = arg0._1(); MatrixBlock blkIn = arg0._2(); MatrixIndexes ixOut = new MatrixIndexes(); MatrixBlock blkOut = new MatrixBlock(); // process instruction OperationsOnMatrixValues.performAggregateUnary( ixIn, blkIn, ixOut, blkOut, ((AggregateUnaryOperator) _op), _brlen, _bclen); if (((AggregateUnaryOperator) _op).aggOp.correctionExists) blkOut.dropLastRowsOrColums(((AggregateUnaryOperator) _op).aggOp.correctionLocation); // cumsum expand partial aggregates long rlenOut = (long) Math.ceil((double) _rlen / _brlen); long rixOut = (long) Math.ceil((double) ixIn.getRowIndex() / _brlen); int rlenBlk = (int) Math.min(rlenOut - (rixOut - 1) * _brlen, _brlen); int clenBlk = blkOut.getNumColumns(); int posBlk = (int) ((ixIn.getRowIndex() - 1) % _brlen); MatrixBlock blkOut2 = new MatrixBlock(rlenBlk, clenBlk, false); blkOut2.copy(posBlk, posBlk, 0, clenBlk - 1, blkOut, true); ixOut.setIndexes(rixOut, ixOut.getColumnIndex()); // output new tuple return new Tuple2<MatrixIndexes, MatrixBlock>(ixOut, blkOut2); }
@Override public MatrixBlock readMatrixFromHDFS( String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { // allocate output matrix block MatrixBlock ret = createOutputMatrixBlock(rlen, clen, (int) rlen, (int) clen, estnnz, true, false); // prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fname); // check existence and non-empty file checkValidInputFile(fs, path); // core read readBinaryCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen); // finally check if change of sparse/dense block representation required // (nnz maintained via append during read for both dense/sparse) ret.examSparsity(); return ret; }
protected boolean checkGuardedRepresentationChange( MatrixBlock in1, MatrixBlock in2, MatrixBlock out) { double memDense = OptimizerUtils.estimateSize(out.getNumRows(), out.getNumColumns()); double memIn1 = (in1 != null) ? in1.getInMemorySize() : 0; double memIn2 = (in2 != null) ? in2.getInMemorySize() : 0; return (memDense < memIn1 + memIn2); }
/** * @param sqlctx * @param mb * @param schema * @return * @throws DMLRuntimeException */ @SuppressWarnings("resource") private DataFrame createDataFrame( SQLContext sqlctx, MatrixBlock mb, boolean containsID, ValueType[] schema) throws DMLRuntimeException { // create in-memory list of rows List<Row> list = new ArrayList<Row>(); int off = (containsID ? 1 : 0); int clen = mb.getNumColumns() + off - colsVector + 1; for (int i = 0; i < mb.getNumRows(); i++) { Object[] row = new Object[clen]; if (containsID) row[0] = i + 1; for (int j = 0, j2 = 0; j < mb.getNumColumns(); j++, j2++) { if (schema[j2] != ValueType.OBJECT) { row[j2 + off] = UtilFunctions.doubleToObject(schema[j2], mb.quickGetValue(i, j)); } else { double[] tmp = DataConverter.convertToDoubleVector( mb.sliceOperations(i, i, j, j + colsVector - 1, new MatrixBlock())); row[j2 + off] = new DenseVector(tmp); j += colsVector - 1; } } list.add(RowFactory.create(row)); } // create data frame schema List<StructField> fields = new ArrayList<StructField>(); if (containsID) fields.add( DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); for (int j = 0; j < schema.length; j++) { DataType dt = null; switch (schema[j]) { case STRING: dt = DataTypes.StringType; break; case DOUBLE: dt = DataTypes.DoubleType; break; case INT: dt = DataTypes.LongType; break; case OBJECT: dt = new VectorUDT(); break; default: throw new RuntimeException("Unsupported value type."); } fields.add(DataTypes.createStructField("C" + (j + 1), dt, true)); } StructType dfSchema = DataTypes.createStructType(fields); // create rdd and data frame JavaSparkContext sc = new JavaSparkContext(sqlctx.sparkContext()); JavaRDD<Row> rowRDD = sc.parallelize(list); return sqlctx.createDataFrame(rowRDD, dfSchema); }
/** * @param vector * @param singleColBlock * @param dense * @param unknownDims */ private void testDataFrameConversion( ValueType[] schema, boolean containsID, boolean dense, boolean unknownDims) { boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG; RUNTIME_PLATFORM oldPlatform = DMLScript.rtplatform; SparkExecutionContext sec = null; try { DMLScript.USE_LOCAL_SPARK_CONFIG = true; DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; // generate input data and setup metadata int cols = schema.length + colsVector - 1; double sparsity = dense ? sparsity1 : sparsity2; double[][] A = TestUtils.round(getRandomMatrix(rows1, cols, -10, 1000, sparsity, 2373)); MatrixBlock mbA = DataConverter.convertToMatrixBlock(A); int blksz = ConfigurationManager.getBlocksize(); MatrixCharacteristics mc1 = new MatrixCharacteristics(rows1, cols, blksz, blksz, mbA.getNonZeros()); MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1); // setup spark context sec = (SparkExecutionContext) ExecutionContextFactory.createContext(); JavaSparkContext sc = sec.getSparkContext(); SQLContext sqlctx = new SQLContext(sc); // create input data frame DataFrame df = createDataFrame(sqlctx, mbA, containsID, schema); // dataframe - frame conversion JavaPairRDD<Long, FrameBlock> out = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, containsID); // get output frame block FrameBlock fbB = SparkExecutionContext.toFrameBlock( out, UtilFunctions.nCopies(cols, ValueType.DOUBLE), rows1, cols); // compare frame blocks MatrixBlock mbB = DataConverter.convertToMatrixBlock(fbB); double[][] B = DataConverter.convertToDoubleMatrix(mbB); TestUtils.compareMatrices(A, B, rows1, cols, eps); } catch (Exception ex) { throw new RuntimeException(ex); } finally { sec.close(); DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig; DMLScript.rtplatform = oldPlatform; } }
/** * @param c * @param vars * @return * @throws DMLRuntimeException */ private static LiteralOp replaceLiteralFullUnaryAggregateRightIndexing( Hop c, LocalVariableMap vars) throws DMLRuntimeException { LiteralOp ret = null; // full unary aggregate w/ indexed matrix less than 10^6 cells if (c instanceof AggUnaryOp && isReplaceableUnaryAggregate((AggUnaryOp) c) && c.getInput().get(0) instanceof IndexingOp && c.getInput().get(0).getInput().get(0) instanceof DataOp) { IndexingOp rix = (IndexingOp) c.getInput().get(0); Hop data = rix.getInput().get(0); Hop rl = rix.getInput().get(1); Hop ru = rix.getInput().get(2); Hop cl = rix.getInput().get(3); Hop cu = rix.getInput().get(4); if (data instanceof DataOp && vars.keySet().contains(data.getName()) && isIntValueDataLiteral(rl, vars) && isIntValueDataLiteral(ru, vars) && isIntValueDataLiteral(cl, vars) && isIntValueDataLiteral(cu, vars)) { long rlval = getIntValueDataLiteral(rl, vars); long ruval = getIntValueDataLiteral(ru, vars); long clval = getIntValueDataLiteral(cl, vars); long cuval = getIntValueDataLiteral(cu, vars); MatrixObject mo = (MatrixObject) vars.get(data.getName()); // get the dimension information from the matrix object because the hop // dimensions might not have been updated during recompile if (mo.getNumRows() * mo.getNumColumns() < REPLACE_LITERALS_MAX_MATRIX_SIZE) { MatrixBlock mBlock = mo.acquireRead(); MatrixBlock mBlock2 = mBlock.sliceOperations( (int) (rlval - 1), (int) (ruval - 1), (int) (clval - 1), (int) (cuval - 1), new MatrixBlock()); double value = replaceUnaryAggregate((AggUnaryOp) c, mBlock2); mo.release(); // literal substitution (always double) ret = new LiteralOp(value); } } } return ret; }
@Override public MatrixBlock call(MatrixBlock arg0) throws Exception { MatrixBlock pmV = _pmV.getMatrixBlock(1, 1); // execute mapmmchain operation MatrixBlock out = new MatrixBlock(); return arg0.chainMatrixMultOperations(pmV, null, out, ChainType.XtXv); }
@Override public Tuple2<MatrixIndexes, MatrixBlock> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception { MatrixBlock pmV = _pmV.getMatrixBlock(1, 1); MatrixIndexes ixIn = arg0._1(); MatrixBlock blkIn = arg0._2(); int rowIx = (int) ixIn.getRowIndex(); MatrixIndexes ixOut = new MatrixIndexes(1, 1); MatrixBlock blkOut = new MatrixBlock(); // execute mapmmchain operation blkIn.chainMatrixMultOperations(pmV, _pmW.getMatrixBlock(rowIx, 1), blkOut, ChainType.XtwXv); // output new tuple return new Tuple2<MatrixIndexes, MatrixBlock>(ixOut, blkOut); }
/** * @param c * @param vars * @return * @throws DMLRuntimeException */ private static LiteralOp replaceLiteralValueTypeCastRightIndexing(Hop c, LocalVariableMap vars) throws DMLRuntimeException { LiteralOp ret = null; // as.scalar/right indexing w/ literals/vars and matrix less than 10^6 cells if (c instanceof UnaryOp && ((UnaryOp) c).getOp() == OpOp1.CAST_AS_SCALAR && c.getInput().get(0) instanceof IndexingOp && c.getInput().get(0).getDataType() == DataType.MATRIX) { IndexingOp rix = (IndexingOp) c.getInput().get(0); Hop data = rix.getInput().get(0); Hop rl = rix.getInput().get(1); Hop ru = rix.getInput().get(2); Hop cl = rix.getInput().get(3); Hop cu = rix.getInput().get(4); if (rix.dimsKnown() && rix.getDim1() == 1 && rix.getDim2() == 1 && data instanceof DataOp && vars.keySet().contains(data.getName()) && isIntValueDataLiteral(rl, vars) && isIntValueDataLiteral(ru, vars) && isIntValueDataLiteral(cl, vars) && isIntValueDataLiteral(cu, vars)) { long rlval = getIntValueDataLiteral(rl, vars); long clval = getIntValueDataLiteral(cl, vars); MatrixObject mo = (MatrixObject) vars.get(data.getName()); // get the dimension information from the matrix object because the hop // dimensions might not have been updated during recompile if (mo.getNumRows() * mo.getNumColumns() < REPLACE_LITERALS_MAX_MATRIX_SIZE) { MatrixBlock mBlock = mo.acquireRead(); double value = mBlock.getValue((int) rlval - 1, (int) clval - 1); mo.release(); // literal substitution (always double) ret = new LiteralOp(value); } } } return ret; }
@Override public void processInstruction( Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) throws DMLRuntimeException { ArrayList<IndexedMatrixValue> blkList = cachedValues.get(input); if (blkList == null) return; for (IndexedMatrixValue in1 : blkList) { if (in1 == null) continue; MatrixIndexes inix = in1.getIndexes(); MatrixBlock blk = (MatrixBlock) in1.getValue(); long rixOffset = (inix.getRowIndex() - 1) * blockRowFactor; boolean firstBlk = (inix.getRowIndex() == 1); boolean lastBlk = (inix.getRowIndex() == _lastRowBlockIndex); // introduce offsets w/ init value for first row if (firstBlk) { IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass); ((MatrixBlock) out.getValue()).reset(1, blk.getNumColumns()); if (_initValue != 0) { for (int j = 0; j < blk.getNumColumns(); j++) ((MatrixBlock) out.getValue()).appendValue(0, j, _initValue); } out.getIndexes().setIndexes(1, inix.getColumnIndex()); } // output splitting (shift by one), preaggregated offset used by subsequent block for (int i = 0; i < blk.getNumRows(); i++) if (!(lastBlk && i == (blk.getNumRows() - 1))) // ignore last row { IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass); MatrixBlock tmpBlk = (MatrixBlock) out.getValue(); tmpBlk.reset(1, blk.getNumColumns()); blk.sliceOperations(i, i, 0, blk.getNumColumns() - 1, tmpBlk); out.getIndexes().setIndexes(rixOffset + i + 2, inix.getColumnIndex()); } } }
/** @param mb */ private void runTransposeSelfMatrixMultTest( SparsityType sptype, ValueType vtype, boolean compress) { try { // prepare sparsity for input data double sparsity = -1; switch (sptype) { case DENSE: sparsity = sparsity1; break; case SPARSE: sparsity = sparsity2; break; case EMPTY: sparsity = sparsity3; break; } // generate input data double min = (vtype == ValueType.CONST) ? 10 : -10; double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7); if (vtype == ValueType.RAND_ROUND) input = TestUtils.round(input); MatrixBlock mb = DataConverter.convertToMatrixBlock(input); // compress given matrix block CompressedMatrixBlock cmb = new CompressedMatrixBlock(mb); if (compress) cmb.compress(); // matrix-vector uncompressed MatrixBlock ret1 = mb.transposeSelfMatrixMultOperations(new MatrixBlock(), MMTSJType.LEFT); // matrix-vector compressed MatrixBlock ret2 = cmb.transposeSelfMatrixMultOperations(new MatrixBlock(), MMTSJType.LEFT); // compare result with input double[][] d1 = DataConverter.convertToDoubleMatrix(ret1); double[][] d2 = DataConverter.convertToDoubleMatrix(ret2); TestUtils.compareMatrices(d1, d2, cols, cols, 0.0000001); } catch (Exception ex) { throw new RuntimeException(ex); } }
/** * @param c * @param vars * @return * @throws DMLRuntimeException */ private static LiteralOp replaceLiteralDataTypeCastMatrixRead(Hop c, LocalVariableMap vars) throws DMLRuntimeException { LiteralOp ret = null; // as.scalar/matrix read - literal replacement if (c instanceof UnaryOp && ((UnaryOp) c).getOp() == OpOp1.CAST_AS_SCALAR && c.getInput().get(0) instanceof DataOp && c.getInput().get(0).getDataType() == DataType.MATRIX) { Data dat = vars.get(c.getInput().get(0).getName()); if (dat != null) // required for selective constant propagation { // cast as scalar (see VariableCPInstruction) MatrixObject mo = (MatrixObject) dat; MatrixBlock mBlock = mo.acquireRead(); if (mBlock.getNumRows() != 1 || mBlock.getNumColumns() != 1) throw new DMLRuntimeException( "Dimension mismatch - unable to cast matrix of dimension (" + mBlock.getNumRows() + " x " + mBlock.getNumColumns() + ") to scalar."); double value = mBlock.getValue(0, 0); mo.release(); // literal substitution (always double) ret = new LiteralOp(value); } } return ret; }
@Override protected Tuple2<MatrixIndexes, MatrixBlock> computeNext( Tuple2<MatrixIndexes, MatrixBlock> arg) throws Exception { MatrixIndexes ixIn = arg._1(); MatrixBlock blkIn = arg._2(); MatrixBlock blkOut = new MatrixBlock(); if (_type == CacheType.LEFT) { // get the right hand side matrix MatrixBlock left = _pbc.getMatrixBlock(1, (int) ixIn.getRowIndex()); // execute index preserving matrix multiplication left.aggregateBinaryOperations(left, blkIn, blkOut, _op); } else // if( _type == CacheType.RIGHT ) { // get the right hand side matrix MatrixBlock right = _pbc.getMatrixBlock((int) ixIn.getColumnIndex(), 1); // execute index preserving matrix multiplication blkIn.aggregateBinaryOperations(blkIn, right, blkOut, _op); } return new Tuple2<MatrixIndexes, MatrixBlock>(ixIn, blkOut); }
@Override public void processInstruction(ExecutionContext ec) throws DMLRuntimeException { CPOperand mat = (input1.getDataType() == DataType.MATRIX) ? input1 : input2; CPOperand scalar = (input1.getDataType() == DataType.MATRIX) ? input2 : input1; MatrixBlock inBlock = ec.getMatrixInput(mat.getName()); ScalarObject constant = (ScalarObject) ec.getScalarInput(scalar.getName(), scalar.getValueType(), scalar.isLiteral()); ScalarOperator sc_op = (ScalarOperator) _optr; sc_op.setConstant(constant.getDoubleValue()); MatrixBlock retBlock = (MatrixBlock) inBlock.scalarOperations(sc_op, new MatrixBlock()); ec.releaseMatrixInput(mat.getName()); // Ensure right dense/sparse output representation (guarded by released input memory) if (checkGuardedRepresentationChange(inBlock, retBlock)) { retBlock.examSparsity(); } ec.setMatrixOutput(output.getName(), retBlock); }
@Override public MatrixBlock apply(FrameBlock in, MatrixBlock out) { for (int j = 0; j < _colList.length; j++) { int col = _colList[j] - 1; ValueType vt = in.getSchema()[col]; for (int i = 0; i < in.getNumRows(); i++) { Object val = in.get(i, col); out.quickSetValue( i, col, (val == null || (vt == ValueType.STRING && val.toString().isEmpty())) ? Double.NaN : UtilFunctions.objectToDouble(vt, val)); } } return out; }
public int getBufferSize() { if (_mapBuffer != null) { int ret = 0; for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) ret += ctable.getValue().size(); return ret; } else if (_blockBuffer != null) { int ret = 0; for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) { ctable.getValue().recomputeNonZeros(); ret += MatrixBlock.estimateSizeInMemory( ctable.getValue().getNumRows(), ctable.getValue().getNumColumns(), ((double) ctable.getValue().getNonZeros() / ctable.getValue().getNumRows()) * ctable.getValue().getNumColumns()); } return ret; } else { return 0; } }
/** * @param fileName * @param src * @param rlen * @param clen * @param nnz * @throws IOException */ protected final void writeCSVMatrixToFile( Path path, JobConf job, FileSystem fs, MatrixBlock src, int rl, int ru, CSVFileFormatProperties props) throws IOException { boolean sparse = src.isInSparseFormat(); int clen = src.getNumColumns(); // create buffered writer BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); try { // for obj reuse and preventing repeated buffer re-allocations StringBuilder sb = new StringBuilder(); props = (props == null) ? new CSVFileFormatProperties() : props; String delim = props.getDelim(); boolean csvsparse = props.isSparse(); // Write header line, if needed if (props.hasHeader() && rl == 0) { // write row chunk-wise to prevent OOM on large number of columns for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) { for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) { sb.append("C" + (j + 1)); if (j < clen - 1) sb.append(delim); } br.write(sb.toString()); sb.setLength(0); } sb.append('\n'); br.write(sb.toString()); sb.setLength(0); } // Write data lines if (sparse) // SPARSE { SparseBlock sblock = src.getSparseBlock(); for (int i = rl; i < ru; i++) { // write row chunk-wise to prevent OOM on large number of columns int prev_jix = -1; if (sblock != null && i < sblock.numRows() && !sblock.isEmpty(i)) { int pos = sblock.pos(i); int alen = sblock.size(i); int[] aix = sblock.indexes(i); double[] avals = sblock.values(i); for (int j = pos; j < pos + alen; j++) { int jix = aix[j]; // output empty fields, if needed for (int j2 = prev_jix; j2 < jix - 1; j2++) { if (!csvsparse) sb.append('0'); sb.append(delim); // flush buffered string if (j2 % BLOCKSIZE_J == 0) { br.write(sb.toString()); sb.setLength(0); } } // output the value (non-zero) sb.append(avals[j]); if (jix < clen - 1) sb.append(delim); br.write(sb.toString()); sb.setLength(0); // flush buffered string if (jix % BLOCKSIZE_J == 0) { br.write(sb.toString()); sb.setLength(0); } prev_jix = jix; } } // Output empty fields at the end of the row. // In case of an empty row, output (clen-1) empty fields for (int bj = prev_jix + 1; bj < clen; bj += BLOCKSIZE_J) { for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) { if (!csvsparse) sb.append('0'); if (j < clen - 1) sb.append(delim); } br.write(sb.toString()); sb.setLength(0); } sb.append('\n'); br.write(sb.toString()); sb.setLength(0); } } else // DENSE { for (int i = rl; i < ru; i++) { // write row chunk-wise to prevent OOM on large number of columns for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) { for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) { double lvalue = src.getValueDenseUnsafe(i, j); if (lvalue != 0) // for nnz sb.append(lvalue); else if (!csvsparse) sb.append('0'); if (j != clen - 1) sb.append(delim); } br.write(sb.toString()); sb.setLength(0); } sb.append('\n'); br.write(sb.toString()); // same as append sb.setLength(0); } } } finally { IOUtilFunctions.closeSilently(br); } }
/** * @param path * @param job * @param fs * @param src * @param csvprops * @throws IOException */ protected void writeCSVMatrixToHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock src, CSVFileFormatProperties csvprops) throws IOException { // sequential write csv file writeCSVMatrixToFile(path, job, fs, src, 0, (int) src.getNumRows(), csvprops); }
@Override public MatrixBlock call(MatrixBlock arg0) throws Exception { _aNnz.add((double) arg0.getNonZeros()); return arg0; }
@Override public void processInstruction(ExecutionContext ec) throws DMLRuntimeException { // get inputs MatrixBlock matBlock1 = ec.getMatrixInput(input1.getName()); MatrixBlock matBlock2 = ec.getMatrixInput(input2.getName()); // check input dimensions if (_type == AppendType.CBIND && matBlock1.getNumRows() != matBlock2.getNumRows()) { throw new DMLRuntimeException( "Append-cbind is not possible for input matrices " + input1.getName() + " and " + input2.getName() + " with different number of rows: " + matBlock1.getNumRows() + " vs " + matBlock2.getNumRows()); } else if (_type == AppendType.RBIND && matBlock1.getNumColumns() != matBlock2.getNumColumns()) { throw new DMLRuntimeException( "Append-rbind is not possible for input matrices " + input1.getName() + " and " + input2.getName() + " with different number of columns: " + matBlock1.getNumColumns() + " vs " + matBlock2.getNumColumns()); } // execute append operations (append both inputs to initially empty output) MatrixBlock ret = matBlock1.appendOperations(matBlock2, new MatrixBlock(), _type == AppendType.CBIND); // set output and release inputs ec.setMatrixOutput(output.getName(), ret); ec.releaseMatrixInput(input1.getName()); ec.releaseMatrixInput(input2.getName()); }
@Override public void processInstruction( Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) throws DMLRuntimeException { QuaternaryOperator qop = (QuaternaryOperator) optr; ArrayList<IndexedMatrixValue> blkList = cachedValues.get(_input1); if (blkList != null) for (IndexedMatrixValue imv : blkList) { // Step 1: prepare inputs and output if (imv == null) continue; MatrixIndexes inIx = imv.getIndexes(); MatrixValue inVal = imv.getValue(); // allocate space for the output value IndexedMatrixValue iout = null; if (output == _input1) iout = tempValue; else iout = cachedValues.holdPlace(output, valueClass); MatrixIndexes outIx = iout.getIndexes(); MatrixValue outVal = iout.getValue(); // Step 2: get remaining inputs: Wij, Ui, Vj MatrixValue Xij = inVal; // get Wij if existing (null of WeightsType.NONE or WSigmoid any type) IndexedMatrixValue iWij = (_input4 != -1) ? cachedValues.getFirst(_input4) : null; MatrixValue Wij = (iWij != null) ? iWij.getValue() : null; if (null == Wij && qop.hasFourInputs()) { MatrixBlock mb = new MatrixBlock(1, 1, false); String[] parts = InstructionUtils.getInstructionParts(instString); mb.quickSetValue(0, 0, Double.valueOf(parts[4])); Wij = mb; } // get Ui and Vj, potentially through distributed cache MatrixValue Ui = (!_cacheU) ? cachedValues.getFirst(_input2).getValue() // U : MRBaseForCommonInstructions.dcValues .get(_input2) .getDataBlock((int) inIx.getRowIndex(), 1) .getValue(); MatrixValue Vj = (!_cacheV) ? cachedValues.getFirst(_input3).getValue() // t(V) : MRBaseForCommonInstructions.dcValues .get(_input3) .getDataBlock((int) inIx.getColumnIndex(), 1) .getValue(); // handle special input case: //V through shuffle -> t(V) if (Ui.getNumColumns() != Vj.getNumColumns()) { Vj = LibMatrixReorg.reorg( (MatrixBlock) Vj, new MatrixBlock(Vj.getNumColumns(), Vj.getNumRows(), Vj.isInSparseFormat()), new ReorgOperator(SwapIndex.getSwapIndexFnObject())); } // Step 3: process instruction Xij.quaternaryOperations(qop, Ui, Vj, Wij, outVal); // set output indexes if (qop.wtype1 != null || qop.wtype4 != null) outIx.setIndexes(1, 1); // wsloss else if (qop.wtype2 != null || qop.wtype5 != null || qop.wtype3 != null && qop.wtype3.isBasic()) outIx.setIndexes(inIx); // wsigmoid/wdivmm-basic else { // wdivmm boolean left = qop.wtype3.isLeft(); outIx.setIndexes(left ? inIx.getColumnIndex() : inIx.getRowIndex(), 1); } // put the output value in the cache if (iout == tempValue) cachedValues.add(output, iout); } }
/** * @param path * @param job * @param fs * @param dest * @param rlen * @param clen * @param brlen * @param bclen * @throws IOException */ @SuppressWarnings("deprecation") private void readBinaryCellMatrixFromHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen, long clen, int brlen, int bclen) throws IOException { boolean sparse = dest.isInSparseFormat(); MatrixIndexes key = new MatrixIndexes(); MatrixCell value = new MatrixCell(); int row = -1; int col = -1; try { for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files { // directly read from sequence files (individual partfiles) SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job); try { if (sparse) { while (reader.next(key, value)) { row = (int) key.getRowIndex() - 1; col = (int) key.getColumnIndex() - 1; double lvalue = value.getValue(); dest.appendValue(row, col, lvalue); } } else { while (reader.next(key, value)) { row = (int) key.getRowIndex() - 1; col = (int) key.getColumnIndex() - 1; double lvalue = value.getValue(); dest.appendValue(row, col, lvalue); } } } finally { IOUtilFunctions.closeSilently(reader); } } if (sparse) dest.sortSparseRows(); } catch (Exception ex) { // post-mortem error handling and bounds checking if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) { throw new IOException( "Matrix cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } else { throw new IOException("Unable to read matrix in binary cell format.", ex); } } }
@Override public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call( Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception { ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>(); MatrixIndexes ixIn = arg0._1(); MatrixBlock mb2 = arg0._2(); // get the right hand side matrix MatrixBlock mb1 = _pmV.getMatrixBlock((int) ixIn.getRowIndex(), 1); // compute target block indexes long minPos = UtilFunctions.toLong(mb1.minNonZero()); long maxPos = UtilFunctions.toLong(mb1.max()); long rowIX1 = (minPos - 1) / _brlen + 1; long rowIX2 = (maxPos - 1) / _brlen + 1; boolean multipleOuts = (rowIX1 != rowIX2); if (minPos >= 1) // at least one row selected { // output sparsity estimate double spmb1 = OptimizerUtils.getSparsity(mb1.getNumRows(), 1, mb1.getNonZeros()); long estnnz = (long) (spmb1 * mb2.getNonZeros()); boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, mb2.getNumColumns(), estnnz); // compute and allocate output blocks MatrixBlock out1 = new MatrixBlock(); MatrixBlock out2 = multipleOuts ? new MatrixBlock() : null; out1.reset(_brlen, mb2.getNumColumns(), sparse); if (out2 != null) out2.reset( UtilFunctions.computeBlockSize(_rlen, rowIX2, _brlen), mb2.getNumColumns(), sparse); // compute core matrix permutation (assumes that out1 has default blocksize, // hence we do a meta data correction afterwards) mb1.permutationMatrixMultOperations(mb2, out1, out2); out1.setNumRows(UtilFunctions.computeBlockSize(_rlen, rowIX1, _brlen)); ret.add( new Tuple2<MatrixIndexes, MatrixBlock>( new MatrixIndexes(rowIX1, ixIn.getColumnIndex()), out1)); if (out2 != null) ret.add( new Tuple2<MatrixIndexes, MatrixBlock>( new MatrixIndexes(rowIX2, ixIn.getColumnIndex()), out2)); } return ret; }
@SuppressWarnings("deprecation") public void flushBuffer(Reporter reporter) throws RuntimeException { try { if (_mapBuffer != null) { MatrixIndexes key = null; // new MatrixIndexes(); MatrixCell value = new MatrixCell(); for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) { ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes); CTableMap resultMap = ctable.getValue(); // maintain result dims and nonzeros for (Integer i : resultIDs) { _resultNonZeros[i] += resultMap.size(); if (_resultDimsUnknown[i] == (byte) 1) { _resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]); _resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]); } } // output result data for (LLDoubleEntry e : resultMap.entrySet()) { key = new MatrixIndexes(e.key1, e.key2); value.setValue(e.value); for (Integer i : resultIDs) { _collector.collectOutput(key, value, i, reporter); } } } } else if (_blockBuffer != null) { MatrixIndexes key = new MatrixIndexes(1, 1); // DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(), // mc.get_rows_per_block(), mc.get_cols_per_block(), replication); for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) { ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes); MatrixBlock outBlock = ctable.getValue(); outBlock.recomputeNonZeros(); // TODO: change hard coding of 1000 int brlen = 1000, bclen = 1000; int rlen = outBlock.getNumRows(); int clen = outBlock.getNumColumns(); // final output matrix is smaller than a single block if (rlen <= brlen && clen <= brlen) { key = new MatrixIndexes(1, 1); for (Integer i : resultIDs) { _collector.collectOutput(key, outBlock, i, reporter); _resultNonZeros[i] += outBlock.getNonZeros(); } } else { // Following code is similar to that in // DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS // initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse( rlen, clen, brlen, bclen, true, outBlock.getNonZeros()); // create and write subblocks of matrix for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) { for (int blockCol = 0; blockCol < (int) Math.ceil(clen / (double) bclen); blockCol++) { int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen; int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen; int row_offset = blockRow * brlen; int col_offset = blockCol * bclen; // get reuse matrix block MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen); // copy submatrix to block outBlock.sliceOperations( row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block); // TODO: skip empty "block" // append block to sequence file key.setIndexes(blockRow + 1, blockCol + 1); for (Integer i : resultIDs) { _collector.collectOutput(key, block, i, reporter); _resultNonZeros[i] += block.getNonZeros(); } // reset block for later reuse block.reset(); } } } } } else { throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty."); } } catch (Exception ex) { throw new RuntimeException("Failed to flush ctable buffer.", ex); } // remove existing partial ctables if (_mapBuffer != null) _mapBuffer.clear(); else _blockBuffer.clear(); }
@Override public void processInstruction( Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) throws DMLRuntimeException { IndexedMatrixValue in1 = cachedValues.getFirst(input1); // original data IndexedMatrixValue in2 = cachedValues.getFirst(input2); // offset row vector if (in1 == null || in2 == null) throw new DMLRuntimeException( "Unexpected empty input (left=" + ((in1 == null) ? "null" : in1.getIndexes()) + ", right=" + ((in2 == null) ? "null" : in2.getIndexes()) + ")."); // prepare inputs and outputs IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass); MatrixBlock data = (MatrixBlock) in1.getValue(); MatrixBlock offset = (MatrixBlock) in2.getValue(); MatrixBlock blk = (MatrixBlock) out.getValue(); blk.reset(data.getNumRows(), data.getNumColumns()); // blockwise offset aggregation and prefix sum computation MatrixBlock data2 = new MatrixBlock(data); // cp data MatrixBlock fdata2 = data2.sliceOperations(0, 0, 0, data2.getNumColumns() - 1, new MatrixBlock()); // 1-based fdata2.binaryOperationsInPlace(_bop, offset); // sum offset to first row data2.copy(0, 0, 0, data2.getNumColumns() - 1, fdata2, true); // 0-based data2.unaryOperations(_uop, blk); // compute columnwise prefix sums/prod/min/max // set output indexes out.getIndexes().setIndexes(in1.getIndexes()); }
@Override public MatrixBlock call(MatrixBlock arg0) throws Exception { return (MatrixBlock) arg0.unaryOperations(_op, new MatrixBlock()); }