コード例 #1
0
  @Override
  public void processInstruction(
      Class<? extends MatrixValue> valueClass,
      CachedValueMap cachedValues,
      IndexedMatrixValue tempValue,
      IndexedMatrixValue zeroInput,
      int brlen,
      int bclen)
      throws DMLUnsupportedOperationException, DMLRuntimeException {
    // get both inputs
    IndexedMatrixValue left = cachedValues.getFirst(input1);
    IndexedMatrixValue right = cachedValues.getFirst(input2);

    // check non-existing block
    if (left == null || right == null)
      throw new DMLRuntimeException(
          "Missing append input: isNull(left): "
              + (left == null)
              + ", isNull(right): "
              + (right == null));

    // core append operation
    MatrixBlock mbLeft = (MatrixBlock) left.getValue();
    MatrixBlock mbRight = (MatrixBlock) right.getValue();

    MatrixBlock ret = mbLeft.appendOperations(mbRight, new MatrixBlock(), _cbind);

    // put result into cache
    cachedValues.add(output, new IndexedMatrixValue(left.getIndexes(), ret));
  }
コード例 #2
0
  @Override
  public final void writeMatrixToHDFS(
      MatrixBlock src, String fname, long rlen, long clen, int brlen, int bclen, long nnz)
      throws IOException, DMLRuntimeException {
    // validity check matrix dimensions
    if (src.getNumRows() != rlen || src.getNumColumns() != clen) {
      throw new IOException(
          "Matrix dimensions mismatch with metadata: "
              + src.getNumRows()
              + "x"
              + src.getNumColumns()
              + " vs "
              + rlen
              + "x"
              + clen
              + ".");
    }

    // prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    // if the file already exists on HDFS, remove it.
    MapReduceTool.deleteFileIfExistOnHDFS(fname);

    // core write (sequential/parallel)
    writeCSVMatrixToHDFS(path, job, fs, src, _props);

    IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
  }
コード例 #3
0
  /**
   * @param auop
   * @param mb
   * @return
   * @throws DMLRuntimeException
   */
  private static double replaceUnaryAggregate(AggUnaryOp auop, MatrixBlock mb)
      throws DMLRuntimeException {
    // setup stats reporting if necessary
    boolean REPORT_STATS = (DMLScript.STATISTICS && REPORT_LITERAL_REPLACE_OPS_STATS);
    long t0 = REPORT_STATS ? System.nanoTime() : 0;

    // compute required unary aggregate
    double val = Double.MAX_VALUE;
    switch (auop.getOp()) {
      case SUM:
        val = mb.sum();
        break;
      case SUM_SQ:
        val = mb.sumSq();
        break;
      case MIN:
        val = mb.min();
        break;
      case MAX:
        val = mb.max();
        break;
      default:
        throw new DMLRuntimeException("Unsupported unary aggregate replacement: " + auop.getOp());
    }

    // report statistics if necessary
    if (REPORT_STATS) {
      long t1 = System.nanoTime();
      Statistics.maintainCPHeavyHitters("rlit", t1 - t0);
    }

    return val;
  }
コード例 #4
0
    @Override
    public Tuple2<MatrixIndexes, MatrixBlock> call(Tuple2<MatrixIndexes, MatrixBlock> arg0)
        throws Exception {
      MatrixIndexes ixIn = arg0._1();
      MatrixBlock blkIn = arg0._2();

      MatrixIndexes ixOut = new MatrixIndexes();
      MatrixBlock blkOut = new MatrixBlock();

      // process instruction
      OperationsOnMatrixValues.performAggregateUnary(
          ixIn, blkIn, ixOut, blkOut, ((AggregateUnaryOperator) _op), _brlen, _bclen);
      if (((AggregateUnaryOperator) _op).aggOp.correctionExists)
        blkOut.dropLastRowsOrColums(((AggregateUnaryOperator) _op).aggOp.correctionLocation);

      // cumsum expand partial aggregates
      long rlenOut = (long) Math.ceil((double) _rlen / _brlen);
      long rixOut = (long) Math.ceil((double) ixIn.getRowIndex() / _brlen);
      int rlenBlk = (int) Math.min(rlenOut - (rixOut - 1) * _brlen, _brlen);
      int clenBlk = blkOut.getNumColumns();
      int posBlk = (int) ((ixIn.getRowIndex() - 1) % _brlen);
      MatrixBlock blkOut2 = new MatrixBlock(rlenBlk, clenBlk, false);
      blkOut2.copy(posBlk, posBlk, 0, clenBlk - 1, blkOut, true);
      ixOut.setIndexes(rixOut, ixOut.getColumnIndex());

      // output new tuple
      return new Tuple2<MatrixIndexes, MatrixBlock>(ixOut, blkOut2);
    }
コード例 #5
0
  @Override
  public MatrixBlock readMatrixFromHDFS(
      String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
      throws IOException, DMLRuntimeException {
    // allocate output matrix block
    MatrixBlock ret =
        createOutputMatrixBlock(rlen, clen, (int) rlen, (int) clen, estnnz, true, false);

    // prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fname);

    // check existence and non-empty file
    checkValidInputFile(fs, path);

    // core read
    readBinaryCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);

    // finally check if change of sparse/dense block representation required
    // (nnz maintained via append during read for both dense/sparse)
    ret.examSparsity();

    return ret;
  }
コード例 #6
0
 protected boolean checkGuardedRepresentationChange(
     MatrixBlock in1, MatrixBlock in2, MatrixBlock out) {
   double memDense = OptimizerUtils.estimateSize(out.getNumRows(), out.getNumColumns());
   double memIn1 = (in1 != null) ? in1.getInMemorySize() : 0;
   double memIn2 = (in2 != null) ? in2.getInMemorySize() : 0;
   return (memDense < memIn1 + memIn2);
 }
  /**
   * @param sqlctx
   * @param mb
   * @param schema
   * @return
   * @throws DMLRuntimeException
   */
  @SuppressWarnings("resource")
  private DataFrame createDataFrame(
      SQLContext sqlctx, MatrixBlock mb, boolean containsID, ValueType[] schema)
      throws DMLRuntimeException {
    // create in-memory list of rows
    List<Row> list = new ArrayList<Row>();
    int off = (containsID ? 1 : 0);
    int clen = mb.getNumColumns() + off - colsVector + 1;

    for (int i = 0; i < mb.getNumRows(); i++) {
      Object[] row = new Object[clen];
      if (containsID) row[0] = i + 1;
      for (int j = 0, j2 = 0; j < mb.getNumColumns(); j++, j2++) {
        if (schema[j2] != ValueType.OBJECT) {
          row[j2 + off] = UtilFunctions.doubleToObject(schema[j2], mb.quickGetValue(i, j));
        } else {
          double[] tmp =
              DataConverter.convertToDoubleVector(
                  mb.sliceOperations(i, i, j, j + colsVector - 1, new MatrixBlock()));
          row[j2 + off] = new DenseVector(tmp);
          j += colsVector - 1;
        }
      }
      list.add(RowFactory.create(row));
    }

    // create data frame schema
    List<StructField> fields = new ArrayList<StructField>();
    if (containsID)
      fields.add(
          DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true));
    for (int j = 0; j < schema.length; j++) {
      DataType dt = null;
      switch (schema[j]) {
        case STRING:
          dt = DataTypes.StringType;
          break;
        case DOUBLE:
          dt = DataTypes.DoubleType;
          break;
        case INT:
          dt = DataTypes.LongType;
          break;
        case OBJECT:
          dt = new VectorUDT();
          break;
        default:
          throw new RuntimeException("Unsupported value type.");
      }
      fields.add(DataTypes.createStructField("C" + (j + 1), dt, true));
    }
    StructType dfSchema = DataTypes.createStructType(fields);

    // create rdd and data frame
    JavaSparkContext sc = new JavaSparkContext(sqlctx.sparkContext());
    JavaRDD<Row> rowRDD = sc.parallelize(list);
    return sqlctx.createDataFrame(rowRDD, dfSchema);
  }
  /**
   * @param vector
   * @param singleColBlock
   * @param dense
   * @param unknownDims
   */
  private void testDataFrameConversion(
      ValueType[] schema, boolean containsID, boolean dense, boolean unknownDims) {
    boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
    RUNTIME_PLATFORM oldPlatform = DMLScript.rtplatform;

    SparkExecutionContext sec = null;

    try {
      DMLScript.USE_LOCAL_SPARK_CONFIG = true;
      DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;

      // generate input data and setup metadata
      int cols = schema.length + colsVector - 1;
      double sparsity = dense ? sparsity1 : sparsity2;
      double[][] A = TestUtils.round(getRandomMatrix(rows1, cols, -10, 1000, sparsity, 2373));
      MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
      int blksz = ConfigurationManager.getBlocksize();
      MatrixCharacteristics mc1 =
          new MatrixCharacteristics(rows1, cols, blksz, blksz, mbA.getNonZeros());
      MatrixCharacteristics mc2 =
          unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);

      // setup spark context
      sec = (SparkExecutionContext) ExecutionContextFactory.createContext();
      JavaSparkContext sc = sec.getSparkContext();
      SQLContext sqlctx = new SQLContext(sc);

      // create input data frame
      DataFrame df = createDataFrame(sqlctx, mbA, containsID, schema);

      // dataframe - frame conversion
      JavaPairRDD<Long, FrameBlock> out =
          FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, containsID);

      // get output frame block
      FrameBlock fbB =
          SparkExecutionContext.toFrameBlock(
              out, UtilFunctions.nCopies(cols, ValueType.DOUBLE), rows1, cols);

      // compare frame blocks
      MatrixBlock mbB = DataConverter.convertToMatrixBlock(fbB);
      double[][] B = DataConverter.convertToDoubleMatrix(mbB);
      TestUtils.compareMatrices(A, B, rows1, cols, eps);
    } catch (Exception ex) {
      throw new RuntimeException(ex);
    } finally {
      sec.close();
      DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
      DMLScript.rtplatform = oldPlatform;
    }
  }
コード例 #9
0
  /**
   * @param c
   * @param vars
   * @return
   * @throws DMLRuntimeException
   */
  private static LiteralOp replaceLiteralFullUnaryAggregateRightIndexing(
      Hop c, LocalVariableMap vars) throws DMLRuntimeException {
    LiteralOp ret = null;

    // full unary aggregate w/ indexed matrix less than 10^6 cells
    if (c instanceof AggUnaryOp
        && isReplaceableUnaryAggregate((AggUnaryOp) c)
        && c.getInput().get(0) instanceof IndexingOp
        && c.getInput().get(0).getInput().get(0) instanceof DataOp) {
      IndexingOp rix = (IndexingOp) c.getInput().get(0);
      Hop data = rix.getInput().get(0);
      Hop rl = rix.getInput().get(1);
      Hop ru = rix.getInput().get(2);
      Hop cl = rix.getInput().get(3);
      Hop cu = rix.getInput().get(4);

      if (data instanceof DataOp
          && vars.keySet().contains(data.getName())
          && isIntValueDataLiteral(rl, vars)
          && isIntValueDataLiteral(ru, vars)
          && isIntValueDataLiteral(cl, vars)
          && isIntValueDataLiteral(cu, vars)) {
        long rlval = getIntValueDataLiteral(rl, vars);
        long ruval = getIntValueDataLiteral(ru, vars);
        long clval = getIntValueDataLiteral(cl, vars);
        long cuval = getIntValueDataLiteral(cu, vars);

        MatrixObject mo = (MatrixObject) vars.get(data.getName());

        // get the dimension information from the matrix object because the hop
        // dimensions might not have been updated during recompile
        if (mo.getNumRows() * mo.getNumColumns() < REPLACE_LITERALS_MAX_MATRIX_SIZE) {
          MatrixBlock mBlock = mo.acquireRead();
          MatrixBlock mBlock2 =
              mBlock.sliceOperations(
                  (int) (rlval - 1),
                  (int) (ruval - 1),
                  (int) (clval - 1),
                  (int) (cuval - 1),
                  new MatrixBlock());
          double value = replaceUnaryAggregate((AggUnaryOp) c, mBlock2);
          mo.release();

          // literal substitution (always double)
          ret = new LiteralOp(value);
        }
      }
    }

    return ret;
  }
コード例 #10
0
    @Override
    public MatrixBlock call(MatrixBlock arg0) throws Exception {
      MatrixBlock pmV = _pmV.getMatrixBlock(1, 1);

      // execute mapmmchain operation
      MatrixBlock out = new MatrixBlock();
      return arg0.chainMatrixMultOperations(pmV, null, out, ChainType.XtXv);
    }
コード例 #11
0
    @Override
    public Tuple2<MatrixIndexes, MatrixBlock> call(Tuple2<MatrixIndexes, MatrixBlock> arg0)
        throws Exception {
      MatrixBlock pmV = _pmV.getMatrixBlock(1, 1);

      MatrixIndexes ixIn = arg0._1();
      MatrixBlock blkIn = arg0._2();
      int rowIx = (int) ixIn.getRowIndex();

      MatrixIndexes ixOut = new MatrixIndexes(1, 1);
      MatrixBlock blkOut = new MatrixBlock();

      // execute mapmmchain operation
      blkIn.chainMatrixMultOperations(pmV, _pmW.getMatrixBlock(rowIx, 1), blkOut, ChainType.XtwXv);

      // output new tuple
      return new Tuple2<MatrixIndexes, MatrixBlock>(ixOut, blkOut);
    }
コード例 #12
0
  /**
   * @param c
   * @param vars
   * @return
   * @throws DMLRuntimeException
   */
  private static LiteralOp replaceLiteralValueTypeCastRightIndexing(Hop c, LocalVariableMap vars)
      throws DMLRuntimeException {
    LiteralOp ret = null;

    // as.scalar/right indexing w/ literals/vars and matrix less than 10^6 cells
    if (c instanceof UnaryOp
        && ((UnaryOp) c).getOp() == OpOp1.CAST_AS_SCALAR
        && c.getInput().get(0) instanceof IndexingOp
        && c.getInput().get(0).getDataType() == DataType.MATRIX) {
      IndexingOp rix = (IndexingOp) c.getInput().get(0);
      Hop data = rix.getInput().get(0);
      Hop rl = rix.getInput().get(1);
      Hop ru = rix.getInput().get(2);
      Hop cl = rix.getInput().get(3);
      Hop cu = rix.getInput().get(4);
      if (rix.dimsKnown()
          && rix.getDim1() == 1
          && rix.getDim2() == 1
          && data instanceof DataOp
          && vars.keySet().contains(data.getName())
          && isIntValueDataLiteral(rl, vars)
          && isIntValueDataLiteral(ru, vars)
          && isIntValueDataLiteral(cl, vars)
          && isIntValueDataLiteral(cu, vars)) {
        long rlval = getIntValueDataLiteral(rl, vars);
        long clval = getIntValueDataLiteral(cl, vars);

        MatrixObject mo = (MatrixObject) vars.get(data.getName());

        // get the dimension information from the matrix object because the hop
        // dimensions might not have been updated during recompile
        if (mo.getNumRows() * mo.getNumColumns() < REPLACE_LITERALS_MAX_MATRIX_SIZE) {
          MatrixBlock mBlock = mo.acquireRead();
          double value = mBlock.getValue((int) rlval - 1, (int) clval - 1);
          mo.release();

          // literal substitution (always double)
          ret = new LiteralOp(value);
        }
      }
    }

    return ret;
  }
コード例 #13
0
  @Override
  public void processInstruction(
      Class<? extends MatrixValue> valueClass,
      CachedValueMap cachedValues,
      IndexedMatrixValue tempValue,
      IndexedMatrixValue zeroInput,
      int blockRowFactor,
      int blockColFactor)
      throws DMLRuntimeException {
    ArrayList<IndexedMatrixValue> blkList = cachedValues.get(input);
    if (blkList == null) return;

    for (IndexedMatrixValue in1 : blkList) {
      if (in1 == null) continue;

      MatrixIndexes inix = in1.getIndexes();
      MatrixBlock blk = (MatrixBlock) in1.getValue();
      long rixOffset = (inix.getRowIndex() - 1) * blockRowFactor;
      boolean firstBlk = (inix.getRowIndex() == 1);
      boolean lastBlk = (inix.getRowIndex() == _lastRowBlockIndex);

      // introduce offsets w/ init value for first row
      if (firstBlk) {
        IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
        ((MatrixBlock) out.getValue()).reset(1, blk.getNumColumns());
        if (_initValue != 0) {
          for (int j = 0; j < blk.getNumColumns(); j++)
            ((MatrixBlock) out.getValue()).appendValue(0, j, _initValue);
        }
        out.getIndexes().setIndexes(1, inix.getColumnIndex());
      }

      // output splitting (shift by one), preaggregated offset used by subsequent block
      for (int i = 0; i < blk.getNumRows(); i++)
        if (!(lastBlk && i == (blk.getNumRows() - 1))) // ignore last row
        {
          IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
          MatrixBlock tmpBlk = (MatrixBlock) out.getValue();
          tmpBlk.reset(1, blk.getNumColumns());
          blk.sliceOperations(i, i, 0, blk.getNumColumns() - 1, tmpBlk);
          out.getIndexes().setIndexes(rixOffset + i + 2, inix.getColumnIndex());
        }
    }
  }
  /** @param mb */
  private void runTransposeSelfMatrixMultTest(
      SparsityType sptype, ValueType vtype, boolean compress) {
    try {
      // prepare sparsity for input data
      double sparsity = -1;
      switch (sptype) {
        case DENSE:
          sparsity = sparsity1;
          break;
        case SPARSE:
          sparsity = sparsity2;
          break;
        case EMPTY:
          sparsity = sparsity3;
          break;
      }

      // generate input data
      double min = (vtype == ValueType.CONST) ? 10 : -10;
      double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
      if (vtype == ValueType.RAND_ROUND) input = TestUtils.round(input);
      MatrixBlock mb = DataConverter.convertToMatrixBlock(input);

      // compress given matrix block
      CompressedMatrixBlock cmb = new CompressedMatrixBlock(mb);
      if (compress) cmb.compress();

      // matrix-vector uncompressed
      MatrixBlock ret1 = mb.transposeSelfMatrixMultOperations(new MatrixBlock(), MMTSJType.LEFT);

      // matrix-vector compressed
      MatrixBlock ret2 = cmb.transposeSelfMatrixMultOperations(new MatrixBlock(), MMTSJType.LEFT);

      // compare result with input
      double[][] d1 = DataConverter.convertToDoubleMatrix(ret1);
      double[][] d2 = DataConverter.convertToDoubleMatrix(ret2);
      TestUtils.compareMatrices(d1, d2, cols, cols, 0.0000001);
    } catch (Exception ex) {
      throw new RuntimeException(ex);
    }
  }
コード例 #15
0
  /**
   * @param c
   * @param vars
   * @return
   * @throws DMLRuntimeException
   */
  private static LiteralOp replaceLiteralDataTypeCastMatrixRead(Hop c, LocalVariableMap vars)
      throws DMLRuntimeException {
    LiteralOp ret = null;

    // as.scalar/matrix read - literal replacement
    if (c instanceof UnaryOp
        && ((UnaryOp) c).getOp() == OpOp1.CAST_AS_SCALAR
        && c.getInput().get(0) instanceof DataOp
        && c.getInput().get(0).getDataType() == DataType.MATRIX) {
      Data dat = vars.get(c.getInput().get(0).getName());
      if (dat != null) // required for selective constant propagation
      {
        // cast as scalar (see VariableCPInstruction)
        MatrixObject mo = (MatrixObject) dat;
        MatrixBlock mBlock = mo.acquireRead();
        if (mBlock.getNumRows() != 1 || mBlock.getNumColumns() != 1)
          throw new DMLRuntimeException(
              "Dimension mismatch - unable to cast matrix of dimension ("
                  + mBlock.getNumRows()
                  + " x "
                  + mBlock.getNumColumns()
                  + ") to scalar.");
        double value = mBlock.getValue(0, 0);
        mo.release();

        // literal substitution (always double)
        ret = new LiteralOp(value);
      }
    }

    return ret;
  }
コード例 #16
0
      @Override
      protected Tuple2<MatrixIndexes, MatrixBlock> computeNext(
          Tuple2<MatrixIndexes, MatrixBlock> arg) throws Exception {
        MatrixIndexes ixIn = arg._1();
        MatrixBlock blkIn = arg._2();
        MatrixBlock blkOut = new MatrixBlock();

        if (_type == CacheType.LEFT) {
          // get the right hand side matrix
          MatrixBlock left = _pbc.getMatrixBlock(1, (int) ixIn.getRowIndex());

          // execute index preserving matrix multiplication
          left.aggregateBinaryOperations(left, blkIn, blkOut, _op);
        } else // if( _type == CacheType.RIGHT )
        {
          // get the right hand side matrix
          MatrixBlock right = _pbc.getMatrixBlock((int) ixIn.getColumnIndex(), 1);

          // execute index preserving matrix multiplication
          blkIn.aggregateBinaryOperations(blkIn, right, blkOut, _op);
        }

        return new Tuple2<MatrixIndexes, MatrixBlock>(ixIn, blkOut);
      }
  @Override
  public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    CPOperand mat = (input1.getDataType() == DataType.MATRIX) ? input1 : input2;
    CPOperand scalar = (input1.getDataType() == DataType.MATRIX) ? input2 : input1;

    MatrixBlock inBlock = ec.getMatrixInput(mat.getName());
    ScalarObject constant =
        (ScalarObject)
            ec.getScalarInput(scalar.getName(), scalar.getValueType(), scalar.isLiteral());

    ScalarOperator sc_op = (ScalarOperator) _optr;
    sc_op.setConstant(constant.getDoubleValue());

    MatrixBlock retBlock = (MatrixBlock) inBlock.scalarOperations(sc_op, new MatrixBlock());

    ec.releaseMatrixInput(mat.getName());

    // Ensure right dense/sparse output representation (guarded by released input memory)
    if (checkGuardedRepresentationChange(inBlock, retBlock)) {
      retBlock.examSparsity();
    }

    ec.setMatrixOutput(output.getName(), retBlock);
  }
コード例 #18
0
  @Override
  public MatrixBlock apply(FrameBlock in, MatrixBlock out) {
    for (int j = 0; j < _colList.length; j++) {
      int col = _colList[j] - 1;
      ValueType vt = in.getSchema()[col];
      for (int i = 0; i < in.getNumRows(); i++) {
        Object val = in.get(i, col);
        out.quickSetValue(
            i,
            col,
            (val == null || (vt == ValueType.STRING && val.toString().isEmpty()))
                ? Double.NaN
                : UtilFunctions.objectToDouble(vt, val));
      }
    }

    return out;
  }
コード例 #19
0
 public int getBufferSize() {
   if (_mapBuffer != null) {
     int ret = 0;
     for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) ret += ctable.getValue().size();
     return ret;
   } else if (_blockBuffer != null) {
     int ret = 0;
     for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) {
       ctable.getValue().recomputeNonZeros();
       ret +=
           MatrixBlock.estimateSizeInMemory(
               ctable.getValue().getNumRows(),
               ctable.getValue().getNumColumns(),
               ((double) ctable.getValue().getNonZeros() / ctable.getValue().getNumRows())
                   * ctable.getValue().getNumColumns());
     }
     return ret;
   } else {
     return 0;
   }
 }
コード例 #20
0
  /**
   * @param fileName
   * @param src
   * @param rlen
   * @param clen
   * @param nnz
   * @throws IOException
   */
  protected final void writeCSVMatrixToFile(
      Path path,
      JobConf job,
      FileSystem fs,
      MatrixBlock src,
      int rl,
      int ru,
      CSVFileFormatProperties props)
      throws IOException {
    boolean sparse = src.isInSparseFormat();
    int clen = src.getNumColumns();

    // create buffered writer
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

    try {
      // for obj reuse and preventing repeated buffer re-allocations
      StringBuilder sb = new StringBuilder();

      props = (props == null) ? new CSVFileFormatProperties() : props;
      String delim = props.getDelim();
      boolean csvsparse = props.isSparse();

      // Write header line, if needed
      if (props.hasHeader() && rl == 0) {
        // write row chunk-wise to prevent OOM on large number of columns
        for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) {
          for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
            sb.append("C" + (j + 1));
            if (j < clen - 1) sb.append(delim);
          }
          br.write(sb.toString());
          sb.setLength(0);
        }
        sb.append('\n');
        br.write(sb.toString());
        sb.setLength(0);
      }

      // Write data lines
      if (sparse) // SPARSE
      {
        SparseBlock sblock = src.getSparseBlock();
        for (int i = rl; i < ru; i++) {
          // write row chunk-wise to prevent OOM on large number of columns
          int prev_jix = -1;
          if (sblock != null && i < sblock.numRows() && !sblock.isEmpty(i)) {
            int pos = sblock.pos(i);
            int alen = sblock.size(i);
            int[] aix = sblock.indexes(i);
            double[] avals = sblock.values(i);

            for (int j = pos; j < pos + alen; j++) {
              int jix = aix[j];

              // output empty fields, if needed
              for (int j2 = prev_jix; j2 < jix - 1; j2++) {
                if (!csvsparse) sb.append('0');
                sb.append(delim);

                // flush buffered string
                if (j2 % BLOCKSIZE_J == 0) {
                  br.write(sb.toString());
                  sb.setLength(0);
                }
              }

              // output the value (non-zero)
              sb.append(avals[j]);
              if (jix < clen - 1) sb.append(delim);
              br.write(sb.toString());
              sb.setLength(0);

              // flush buffered string
              if (jix % BLOCKSIZE_J == 0) {
                br.write(sb.toString());
                sb.setLength(0);
              }

              prev_jix = jix;
            }
          }

          // Output empty fields at the end of the row.
          // In case of an empty row, output (clen-1) empty fields
          for (int bj = prev_jix + 1; bj < clen; bj += BLOCKSIZE_J) {
            for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
              if (!csvsparse) sb.append('0');
              if (j < clen - 1) sb.append(delim);
            }
            br.write(sb.toString());
            sb.setLength(0);
          }

          sb.append('\n');
          br.write(sb.toString());
          sb.setLength(0);
        }
      } else // DENSE
      {
        for (int i = rl; i < ru; i++) {
          // write row chunk-wise to prevent OOM on large number of columns
          for (int bj = 0; bj < clen; bj += BLOCKSIZE_J) {
            for (int j = bj; j < Math.min(clen, bj + BLOCKSIZE_J); j++) {
              double lvalue = src.getValueDenseUnsafe(i, j);
              if (lvalue != 0) // for nnz
              sb.append(lvalue);
              else if (!csvsparse) sb.append('0');

              if (j != clen - 1) sb.append(delim);
            }
            br.write(sb.toString());
            sb.setLength(0);
          }

          sb.append('\n');
          br.write(sb.toString()); // same as append
          sb.setLength(0);
        }
      }
    } finally {
      IOUtilFunctions.closeSilently(br);
    }
  }
コード例 #21
0
 /**
  * @param path
  * @param job
  * @param fs
  * @param src
  * @param csvprops
  * @throws IOException
  */
 protected void writeCSVMatrixToHDFS(
     Path path, JobConf job, FileSystem fs, MatrixBlock src, CSVFileFormatProperties csvprops)
     throws IOException {
   // sequential write csv file
   writeCSVMatrixToFile(path, job, fs, src, 0, (int) src.getNumRows(), csvprops);
 }
コード例 #22
0
 @Override
 public MatrixBlock call(MatrixBlock arg0) throws Exception {
   _aNnz.add((double) arg0.getNonZeros());
   return arg0;
 }
コード例 #23
0
  @Override
  public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    // get inputs
    MatrixBlock matBlock1 = ec.getMatrixInput(input1.getName());
    MatrixBlock matBlock2 = ec.getMatrixInput(input2.getName());

    // check input dimensions
    if (_type == AppendType.CBIND && matBlock1.getNumRows() != matBlock2.getNumRows()) {
      throw new DMLRuntimeException(
          "Append-cbind is not possible for input matrices "
              + input1.getName()
              + " and "
              + input2.getName()
              + " with different number of rows: "
              + matBlock1.getNumRows()
              + " vs "
              + matBlock2.getNumRows());
    } else if (_type == AppendType.RBIND
        && matBlock1.getNumColumns() != matBlock2.getNumColumns()) {
      throw new DMLRuntimeException(
          "Append-rbind is not possible for input matrices "
              + input1.getName()
              + " and "
              + input2.getName()
              + " with different number of columns: "
              + matBlock1.getNumColumns()
              + " vs "
              + matBlock2.getNumColumns());
    }

    // execute append operations (append both inputs to initially empty output)
    MatrixBlock ret =
        matBlock1.appendOperations(matBlock2, new MatrixBlock(), _type == AppendType.CBIND);

    // set output and release inputs
    ec.setMatrixOutput(output.getName(), ret);
    ec.releaseMatrixInput(input1.getName());
    ec.releaseMatrixInput(input2.getName());
  }
コード例 #24
0
  @Override
  public void processInstruction(
      Class<? extends MatrixValue> valueClass,
      CachedValueMap cachedValues,
      IndexedMatrixValue tempValue,
      IndexedMatrixValue zeroInput,
      int blockRowFactor,
      int blockColFactor)
      throws DMLRuntimeException {
    QuaternaryOperator qop = (QuaternaryOperator) optr;

    ArrayList<IndexedMatrixValue> blkList = cachedValues.get(_input1);
    if (blkList != null)
      for (IndexedMatrixValue imv : blkList) {
        // Step 1: prepare inputs and output
        if (imv == null) continue;
        MatrixIndexes inIx = imv.getIndexes();
        MatrixValue inVal = imv.getValue();

        // allocate space for the output value
        IndexedMatrixValue iout = null;
        if (output == _input1) iout = tempValue;
        else iout = cachedValues.holdPlace(output, valueClass);

        MatrixIndexes outIx = iout.getIndexes();
        MatrixValue outVal = iout.getValue();

        // Step 2: get remaining inputs: Wij, Ui, Vj
        MatrixValue Xij = inVal;

        // get Wij if existing (null of WeightsType.NONE or WSigmoid any type)
        IndexedMatrixValue iWij = (_input4 != -1) ? cachedValues.getFirst(_input4) : null;
        MatrixValue Wij = (iWij != null) ? iWij.getValue() : null;
        if (null == Wij && qop.hasFourInputs()) {
          MatrixBlock mb = new MatrixBlock(1, 1, false);
          String[] parts = InstructionUtils.getInstructionParts(instString);
          mb.quickSetValue(0, 0, Double.valueOf(parts[4]));
          Wij = mb;
        }

        // get Ui and Vj, potentially through distributed cache
        MatrixValue Ui =
            (!_cacheU)
                ? cachedValues.getFirst(_input2).getValue() // U
                : MRBaseForCommonInstructions.dcValues
                    .get(_input2)
                    .getDataBlock((int) inIx.getRowIndex(), 1)
                    .getValue();
        MatrixValue Vj =
            (!_cacheV)
                ? cachedValues.getFirst(_input3).getValue() // t(V)
                : MRBaseForCommonInstructions.dcValues
                    .get(_input3)
                    .getDataBlock((int) inIx.getColumnIndex(), 1)
                    .getValue();
        // handle special input case: //V through shuffle -> t(V)
        if (Ui.getNumColumns() != Vj.getNumColumns()) {
          Vj =
              LibMatrixReorg.reorg(
                  (MatrixBlock) Vj,
                  new MatrixBlock(Vj.getNumColumns(), Vj.getNumRows(), Vj.isInSparseFormat()),
                  new ReorgOperator(SwapIndex.getSwapIndexFnObject()));
        }

        // Step 3: process instruction
        Xij.quaternaryOperations(qop, Ui, Vj, Wij, outVal);

        // set output indexes

        if (qop.wtype1 != null || qop.wtype4 != null) outIx.setIndexes(1, 1); // wsloss
        else if (qop.wtype2 != null
            || qop.wtype5 != null
            || qop.wtype3 != null && qop.wtype3.isBasic())
          outIx.setIndexes(inIx); // wsigmoid/wdivmm-basic
        else { // wdivmm
          boolean left = qop.wtype3.isLeft();
          outIx.setIndexes(left ? inIx.getColumnIndex() : inIx.getRowIndex(), 1);
        }

        // put the output value in the cache
        if (iout == tempValue) cachedValues.add(output, iout);
      }
  }
コード例 #25
0
  /**
   * @param path
   * @param job
   * @param fs
   * @param dest
   * @param rlen
   * @param clen
   * @param brlen
   * @param bclen
   * @throws IOException
   */
  @SuppressWarnings("deprecation")
  private void readBinaryCellMatrixFromHDFS(
      Path path,
      JobConf job,
      FileSystem fs,
      MatrixBlock dest,
      long rlen,
      long clen,
      int brlen,
      int bclen)
      throws IOException {
    boolean sparse = dest.isInSparseFormat();
    MatrixIndexes key = new MatrixIndexes();
    MatrixCell value = new MatrixCell();
    int row = -1;
    int col = -1;

    try {
      for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files
      {
        // directly read from sequence files (individual partfiles)
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);

        try {
          if (sparse) {
            while (reader.next(key, value)) {
              row = (int) key.getRowIndex() - 1;
              col = (int) key.getColumnIndex() - 1;
              double lvalue = value.getValue();
              dest.appendValue(row, col, lvalue);
            }
          } else {
            while (reader.next(key, value)) {
              row = (int) key.getRowIndex() - 1;
              col = (int) key.getColumnIndex() - 1;
              double lvalue = value.getValue();
              dest.appendValue(row, col, lvalue);
            }
          }
        } finally {
          IOUtilFunctions.closeSilently(reader);
        }
      }

      if (sparse) dest.sortSparseRows();
    } catch (Exception ex) {
      // post-mortem error handling and bounds checking
      if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) {
        throw new IOException(
            "Matrix cell ["
                + (row + 1)
                + ","
                + (col + 1)
                + "] "
                + "out of overall matrix range [1:"
                + rlen
                + ",1:"
                + clen
                + "].");
      } else {
        throw new IOException("Unable to read matrix in binary cell format.", ex);
      }
    }
  }
コード例 #26
0
    @Override
    public Iterable<Tuple2<MatrixIndexes, MatrixBlock>> call(
        Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
      ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret =
          new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>();
      MatrixIndexes ixIn = arg0._1();
      MatrixBlock mb2 = arg0._2();

      // get the right hand side matrix
      MatrixBlock mb1 = _pmV.getMatrixBlock((int) ixIn.getRowIndex(), 1);

      // compute target block indexes
      long minPos = UtilFunctions.toLong(mb1.minNonZero());
      long maxPos = UtilFunctions.toLong(mb1.max());
      long rowIX1 = (minPos - 1) / _brlen + 1;
      long rowIX2 = (maxPos - 1) / _brlen + 1;
      boolean multipleOuts = (rowIX1 != rowIX2);

      if (minPos >= 1) // at least one row selected
      {
        // output sparsity estimate
        double spmb1 = OptimizerUtils.getSparsity(mb1.getNumRows(), 1, mb1.getNonZeros());
        long estnnz = (long) (spmb1 * mb2.getNonZeros());
        boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, mb2.getNumColumns(), estnnz);

        // compute and allocate output blocks
        MatrixBlock out1 = new MatrixBlock();
        MatrixBlock out2 = multipleOuts ? new MatrixBlock() : null;
        out1.reset(_brlen, mb2.getNumColumns(), sparse);
        if (out2 != null)
          out2.reset(
              UtilFunctions.computeBlockSize(_rlen, rowIX2, _brlen), mb2.getNumColumns(), sparse);

        // compute core matrix permutation (assumes that out1 has default blocksize,
        // hence we do a meta data correction afterwards)
        mb1.permutationMatrixMultOperations(mb2, out1, out2);
        out1.setNumRows(UtilFunctions.computeBlockSize(_rlen, rowIX1, _brlen));
        ret.add(
            new Tuple2<MatrixIndexes, MatrixBlock>(
                new MatrixIndexes(rowIX1, ixIn.getColumnIndex()), out1));
        if (out2 != null)
          ret.add(
              new Tuple2<MatrixIndexes, MatrixBlock>(
                  new MatrixIndexes(rowIX2, ixIn.getColumnIndex()), out2));
      }

      return ret;
    }
コード例 #27
0
  @SuppressWarnings("deprecation")
  public void flushBuffer(Reporter reporter) throws RuntimeException {
    try {
      if (_mapBuffer != null) {
        MatrixIndexes key = null; // new MatrixIndexes();
        MatrixCell value = new MatrixCell();
        for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) {
          ArrayList<Integer> resultIDs =
              ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
          CTableMap resultMap = ctable.getValue();

          // maintain result dims and nonzeros
          for (Integer i : resultIDs) {
            _resultNonZeros[i] += resultMap.size();
            if (_resultDimsUnknown[i] == (byte) 1) {
              _resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]);
              _resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]);
            }
          }

          // output result data
          for (LLDoubleEntry e : resultMap.entrySet()) {
            key = new MatrixIndexes(e.key1, e.key2);
            value.setValue(e.value);
            for (Integer i : resultIDs) {
              _collector.collectOutput(key, value, i, reporter);
            }
          }
        }
      } else if (_blockBuffer != null) {
        MatrixIndexes key = new MatrixIndexes(1, 1);
        // DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(),
        // mc.get_rows_per_block(), mc.get_cols_per_block(), replication);
        for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) {
          ArrayList<Integer> resultIDs =
              ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
          MatrixBlock outBlock = ctable.getValue();
          outBlock.recomputeNonZeros();

          // TODO: change hard coding of 1000
          int brlen = 1000, bclen = 1000;
          int rlen = outBlock.getNumRows();
          int clen = outBlock.getNumColumns();

          // final output matrix is smaller than a single block
          if (rlen <= brlen && clen <= brlen) {
            key = new MatrixIndexes(1, 1);
            for (Integer i : resultIDs) {
              _collector.collectOutput(key, outBlock, i, reporter);
              _resultNonZeros[i] += outBlock.getNonZeros();
            }
          } else {
            // Following code is similar to that in
            // DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS
            // initialize blocks for reuse (at most 4 different blocks required)
            MatrixBlock[] blocks =
                MatrixWriter.createMatrixBlocksForReuse(
                    rlen, clen, brlen, bclen, true, outBlock.getNonZeros());

            // create and write subblocks of matrix
            for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) {
              for (int blockCol = 0;
                  blockCol < (int) Math.ceil(clen / (double) bclen);
                  blockCol++) {
                int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen;
                int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen;

                int row_offset = blockRow * brlen;
                int col_offset = blockCol * bclen;

                // get reuse matrix block
                MatrixBlock block =
                    MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);

                // copy submatrix to block
                outBlock.sliceOperations(
                    row_offset,
                    row_offset + maxRow - 1,
                    col_offset,
                    col_offset + maxCol - 1,
                    block);

                // TODO: skip empty "block"

                // append block to sequence file
                key.setIndexes(blockRow + 1, blockCol + 1);
                for (Integer i : resultIDs) {
                  _collector.collectOutput(key, block, i, reporter);
                  _resultNonZeros[i] += block.getNonZeros();
                }

                // reset block for later reuse
                block.reset();
              }
            }
          }
        }
      } else {
        throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty.");
      }
    } catch (Exception ex) {
      throw new RuntimeException("Failed to flush ctable buffer.", ex);
    }
    // remove existing partial ctables
    if (_mapBuffer != null) _mapBuffer.clear();
    else _blockBuffer.clear();
  }
コード例 #28
0
  @Override
  public void processInstruction(
      Class<? extends MatrixValue> valueClass,
      CachedValueMap cachedValues,
      IndexedMatrixValue tempValue,
      IndexedMatrixValue zeroInput,
      int blockRowFactor,
      int blockColFactor)
      throws DMLRuntimeException {
    IndexedMatrixValue in1 = cachedValues.getFirst(input1); // original data
    IndexedMatrixValue in2 = cachedValues.getFirst(input2); // offset row vector

    if (in1 == null || in2 == null)
      throw new DMLRuntimeException(
          "Unexpected empty input (left="
              + ((in1 == null) ? "null" : in1.getIndexes())
              + ", right="
              + ((in2 == null) ? "null" : in2.getIndexes())
              + ").");

    // prepare inputs and outputs
    IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
    MatrixBlock data = (MatrixBlock) in1.getValue();
    MatrixBlock offset = (MatrixBlock) in2.getValue();
    MatrixBlock blk = (MatrixBlock) out.getValue();
    blk.reset(data.getNumRows(), data.getNumColumns());

    // blockwise offset aggregation and prefix sum computation
    MatrixBlock data2 = new MatrixBlock(data); // cp data
    MatrixBlock fdata2 =
        data2.sliceOperations(0, 0, 0, data2.getNumColumns() - 1, new MatrixBlock()); // 1-based
    fdata2.binaryOperationsInPlace(_bop, offset); // sum offset to first row
    data2.copy(0, 0, 0, data2.getNumColumns() - 1, fdata2, true); // 0-based
    data2.unaryOperations(_uop, blk); // compute columnwise prefix sums/prod/min/max

    // set output indexes
    out.getIndexes().setIndexes(in1.getIndexes());
  }
コード例 #29
0
 @Override
 public MatrixBlock call(MatrixBlock arg0) throws Exception {
   return (MatrixBlock) arg0.unaryOperations(_op, new MatrixBlock());
 }