Пример #1
0
  /**
   * Appends the given argument frameblock 'that' to this frameblock by creating a deep copy to
   * prevent side effects. For cbind, the frames are appended column-wise (same number of rows),
   * while for rbind the frames are appended row-wise (same number of columns).
   *
   * @param that
   * @param ret
   * @param cbind
   * @return
   */
  public FrameBlock appendOperations(FrameBlock that, FrameBlock ret, boolean cbind)
      throws DMLRuntimeException {
    if (cbind) // COLUMN APPEND
    {
      // sanity check row dimension mismatch
      if (getNumRows() != that.getNumRows()) {
        throw new DMLRuntimeException(
            "Incompatible number of rows for cbind: "
                + that.getNumRows()
                + " (expected: "
                + getNumRows()
                + ")");
      }

      // allocate output frame
      if (ret == null) ret = new FrameBlock();
      ret._numRows = _numRows;

      // concatenate schemas (w/ deep copy to prevent side effects)
      ret._schema = new ArrayList<ValueType>(_schema);
      ret._schema.addAll(that._schema);
      ret._colnames = new ArrayList<String>(_colnames);
      ret._colnames.addAll(that._colnames);

      // concatenate column data (w/ deep copy to prevent side effects)
      for (Array tmp : _coldata) ret._coldata.add(tmp.clone());
      for (Array tmp : that._coldata) ret._coldata.add(tmp.clone());
    } else // ROW APPEND
    {
      // sanity check column dimension mismatch
      if (getNumColumns() != that.getNumColumns()) {
        throw new DMLRuntimeException(
            "Incompatible number of columns for rbind: "
                + that.getNumColumns()
                + " (expected: "
                + getNumColumns()
                + ")");
      }

      // allocate output frame (incl deep copy schema)
      if (ret == null) ret = new FrameBlock();
      ret._numRows = _numRows;
      ret._schema = new ArrayList<ValueType>(_schema);
      ret._colnames = new ArrayList<String>(_colnames);

      // concatenate data (deep copy first, append second)
      for (Array tmp : _coldata) ret._coldata.add(tmp.clone());
      Iterator<Object[]> iter = that.getObjectRowIterator();
      while (iter.hasNext()) ret.appendRow(iter.next());
    }

    return ret;
  }
  /**
   * This function will get slice of the input frame block overlapping in overall slice(Range),
   * slice has requested for.
   *
   * @param val
   * @param range
   * @param brlen
   * @param bclen
   * @param outlist
   * @throws DMLRuntimeException
   */
  public static void performSlice(
      Pair<Long, FrameBlock> in,
      IndexRange ixrange,
      int brlen,
      int bclen,
      ArrayList<Pair<Long, FrameBlock>> outlist)
      throws DMLRuntimeException {
    long index = in.getKey();
    FrameBlock block = in.getValue();

    // Get Block indexes (rows and columns boundaries)
    long cellIndexTopRow = index;
    long cellIndexBottomRow = index + block.getNumRows() - 1;
    long cellIndexLeftCol = 1;
    long cellIndexRightCol = block.getNumColumns();

    // Calculate block boundaries with range of slice to be performed (Global index)
    long cellIndexOverlapTop = Math.max(cellIndexTopRow, ixrange.rowStart);
    long cellIndexOverlapBottom = Math.min(cellIndexBottomRow, ixrange.rowEnd);
    long cellIndexOverlapLeft = Math.max(cellIndexLeftCol, ixrange.colStart);
    long cellIndexOverlapRight = Math.min(cellIndexRightCol, ixrange.colEnd);

    // check if block is outside the indexing range
    if (cellIndexOverlapTop > cellIndexOverlapBottom
        || cellIndexOverlapLeft > cellIndexOverlapRight) {
      return;
    }

    // Create IndexRange for the slice to be performed on this block.
    IndexRange tmpRange =
        new IndexRange(
            cellIndexOverlapTop - index,
            cellIndexOverlapBottom - index,
            cellIndexOverlapLeft - 1,
            cellIndexOverlapRight - 1);

    // Get Top Row and Left column cutting point.
    int rowCut = (int) (ixrange.rowStart - index);

    // Get indices for result block
    long resultBlockIndexTop = UtilFunctions.computeBlockIndex(cellIndexOverlapTop, brlen);
    long resultBlockIndexBottom = UtilFunctions.computeBlockIndex(cellIndexOverlapBottom, brlen);

    // allocate space for the output value
    for (long r = resultBlockIndexTop; r <= resultBlockIndexBottom; r++) {
      List<ValueType> schema =
          UtilFunctions.getSubSchema(block.getSchema(), tmpRange.colStart, tmpRange.colEnd);
      long iResultIndex = (r - 1) * brlen + tmpRange.rowStart;
      Pair<Long, FrameBlock> out =
          new Pair<Long, FrameBlock>(new Long(iResultIndex + 1), new FrameBlock(schema));
      outlist.add(out);
    }

    // execute actual slice operation
    block.sliceOperations(outlist, tmpRange, rowCut);
  }
  @Override
  public MatrixBlock apply(FrameBlock in, MatrixBlock out) {
    for (int j = 0; j < _colList.length; j++) {
      int col = _colList[j] - 1;
      ValueType vt = in.getSchema()[col];
      for (int i = 0; i < in.getNumRows(); i++) {
        Object val = in.get(i, col);
        out.quickSetValue(
            i,
            col,
            (val == null || (vt == ValueType.STRING && val.toString().isEmpty()))
                ? Double.NaN
                : UtilFunctions.objectToDouble(vt, val));
      }
    }

    return out;
  }
  @Override
  public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    // obtain and pin input frame
    FrameBlock fin = ec.getFrameInput(input1.getName());
    String spec =
        ec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral())
            .getStringValue();
    String[] colnames = fin.getColumnNames();

    // execute block transform encode
    Encoder encoder = EncoderFactory.createEncoder(spec, colnames, fin.getNumColumns(), null);
    MatrixBlock data =
        encoder.encode(
            fin, new MatrixBlock(fin.getNumRows(), fin.getNumColumns(), false)); // build and apply
    FrameBlock meta = encoder.getMetaData(new FrameBlock(fin.getNumColumns(), ValueType.STRING));
    meta.setColumnNames(colnames);

    // release input and outputs
    ec.releaseFrameInput(input1.getName());
    ec.setMatrixOutput(getOutput(0).getName(), data);
    ec.setFrameOutput(getOutput(1).getName(), meta);
  }
Пример #5
0
  /**
   * @param rhsFrame
   * @param rl
   * @param ru
   * @param cl
   * @param cu
   * @param ret
   * @return
   */
  public FrameBlock leftIndexingOperations(
      FrameBlock rhsFrame, int rl, int ru, int cl, int cu, FrameBlock ret)
      throws DMLRuntimeException {
    // check the validity of bounds
    if (rl < 0
        || rl >= getNumRows()
        || ru < rl
        || ru >= getNumRows()
        || cl < 0
        || cu >= getNumColumns()
        || cu < cl
        || cu >= getNumColumns()) {
      throw new DMLRuntimeException(
          "Invalid values for frame indexing: ["
              + (rl + 1)
              + ":"
              + (ru + 1)
              + ","
              + (cl + 1)
              + ":"
              + (cu + 1)
              + "] "
              + "must be within frame dimensions ["
              + getNumRows()
              + ","
              + getNumColumns()
              + "].");
    }
    if ((ru - rl + 1) < rhsFrame.getNumRows() || (cu - cl + 1) < rhsFrame.getNumColumns()) {
      throw new DMLRuntimeException(
          "Invalid values for frame indexing: "
              + "dimensions of the source frame ["
              + rhsFrame.getNumRows()
              + "x"
              + rhsFrame.getNumColumns()
              + "] "
              + "do not match the shape of the frame specified by indices ["
              + (rl + 1)
              + ":"
              + (ru + 1)
              + ", "
              + (cl + 1)
              + ":"
              + (cu + 1)
              + "].");
    }

    // allocate output frame (incl deep copy schema)
    if (ret == null) ret = new FrameBlock();
    ret._numRows = _numRows;
    ret._schema = new ArrayList<ValueType>(_schema);
    ret._colnames = new ArrayList<String>(_colnames);

    // copy data to output and partial overwrite w/ rhs
    for (int j = 0; j < getNumColumns(); j++) {
      Array tmp = _coldata.get(j).clone();
      if (j >= cl && j <= cu) tmp.set(rl, ru, rhsFrame._coldata.get(j - cl));
      ret._coldata.add(tmp);
    }

    return ret;
  }
  /**
   * @param in
   * @param ixrange
   * @param brlen
   * @param bclen
   * @param rlen
   * @param clen
   * @param outlist
   * @throws DMLRuntimeException
   */
  public static void performShift(
      Pair<Long, FrameBlock> in,
      IndexRange ixrange,
      int brlenLeft,
      int clenLeft /*, int bclen*/,
      long rlen,
      long clen,
      ArrayList<Pair<Long, FrameBlock>> outlist)
      throws DMLRuntimeException {
    Long ix = in.getKey();
    FrameBlock fb = in.getValue();
    long start_lhs_globalRowIndex = ixrange.rowStart + (ix - 1);
    long start_lhs_globalColIndex = ixrange.colStart;
    long end_lhs_globalRowIndex = start_lhs_globalRowIndex + fb.getNumRows() - 1;
    long end_lhs_globalColIndex = ixrange.colEnd;

    long start_lhs_rowIndex = UtilFunctions.computeBlockIndex(start_lhs_globalRowIndex, brlenLeft);
    long end_lhs_rowIndex = UtilFunctions.computeBlockIndex(end_lhs_globalRowIndex, brlenLeft);

    for (long leftRowIndex = start_lhs_rowIndex; leftRowIndex <= end_lhs_rowIndex; leftRowIndex++) {

      // Calculate global index of right hand side block
      long lhs_rl = Math.max((leftRowIndex - 1) * brlenLeft + 1, start_lhs_globalRowIndex);
      long lhs_ru = Math.min(leftRowIndex * brlenLeft, end_lhs_globalRowIndex);
      long lhs_cl = start_lhs_globalColIndex;
      long lhs_cu = end_lhs_globalColIndex;

      int lhs_lrl = UtilFunctions.computeCellInBlock(lhs_rl, brlenLeft);
      int lhs_lru = UtilFunctions.computeCellInBlock(lhs_ru, brlenLeft);
      int lhs_lcl = (int) lhs_cl - 1;
      int lhs_lcu = (int) lhs_cu - 1;

      long rhs_rl = lhs_rl - (ixrange.rowStart - 1) - (ix - 1);
      long rhs_ru = rhs_rl + (lhs_ru - lhs_rl);
      long rhs_cl = lhs_cl - ixrange.colStart + 1;
      long rhs_cu = rhs_cl + (lhs_cu - lhs_cl);

      // local indices are 0 (zero) based.
      int rhs_lrl = (int) (UtilFunctions.computeCellInBlock(rhs_rl, fb.getNumRows()));
      int rhs_lru = (int) (UtilFunctions.computeCellInBlock(rhs_ru, fb.getNumRows()));
      int rhs_lcl = (int) rhs_cl - 1;
      int rhs_lcu = (int) rhs_cu - 1;

      FrameBlock slicedRHSBlk =
          fb.sliceOperations(rhs_lrl, rhs_lru, rhs_lcl, rhs_lcu, new FrameBlock());

      int lbclen = clenLeft;

      List<ValueType> schemaPartialLeft = Collections.nCopies(lhs_lcl, ValueType.STRING);
      List<ValueType> schemaRHS =
          UtilFunctions.getSubSchema(fb.getSchema(), rhs_lcl, rhs_lcl - lhs_lcl + lhs_lcu);
      List<ValueType> schema = new ArrayList<ValueType>(schemaPartialLeft);
      schema.addAll(schemaRHS);
      List<ValueType> schemaPartialRight =
          Collections.nCopies(lbclen - schema.size(), ValueType.STRING);
      schema.addAll(schemaPartialRight);
      FrameBlock resultBlock = new FrameBlock(schema);
      int iRHSRows =
          (int)
              (leftRowIndex <= rlen / brlenLeft
                  ? brlenLeft
                  : rlen - (rlen / brlenLeft) * brlenLeft);
      resultBlock.ensureAllocatedColumns(iRHSRows);

      resultBlock =
          resultBlock.leftIndexingOperations(
              slicedRHSBlk, lhs_lrl, lhs_lru, lhs_lcl, lhs_lcu, new FrameBlock());
      outlist.add(new Pair<Long, FrameBlock>((leftRowIndex - 1) * brlenLeft + 1, resultBlock));
    }
  }