public void releaseMatrixOutputForGPUInstruction(String varName) throws DMLRuntimeException {
   MatrixObject mo = getMatrixObject(varName);
   if (mo.getGPUObject() == null || !mo.getGPUObject().isAllocated()) {
     throw new DMLRuntimeException("No output is allocated on GPU");
   }
   mo.getGPUObject().releaseOutput();
 }
 /**
  * Allocates the {@link GPUObject} for a given LOPS Variable (eg. _mVar3)
  *
  * @param varName variable name
  * @return matrix object
  * @throws DMLRuntimeException if DMLRuntimeException occurs
  */
 public MatrixObject allocateGPUMatrixObject(String varName) throws DMLRuntimeException {
   MatrixObject mo = getMatrixObject(varName);
   if (mo.getGPUObject() == null) {
     mo.setGPUObject(GPUContext.createGPUObject(mo));
   }
   return mo;
 }
 public MatrixObject getDenseMatrixOutputForGPUInstruction(String varName)
     throws DMLRuntimeException {
   MatrixObject mo = allocateGPUMatrixObject(varName);
   mo.getGPUObject().acquireDeviceModifyDense();
   mo.getMatrixCharacteristics().setNonZeros(-1);
   return mo;
 }
  public static void matmult(
      MatrixObject left1,
      MatrixObject right1,
      MatrixObject output,
      boolean isLeftTransposed1,
      boolean isRightTransposed1)
      throws DMLRuntimeException {
    if (isInSparseFormat(left1) || isInSparseFormat(right1)) {
      throw new DMLRuntimeException("Sparse GPU matrix multiplication is not implemented");
    }

    // Since CuBLAS expects inputs in column-major format,
    // reverse the order of matrix-multiplication and take care of dimension mismatch.
    MatrixObject left = right1;
    MatrixObject right = left1;
    boolean isLeftTransposed = isRightTransposed1;
    boolean isRightTransposed = isLeftTransposed1;

    char transa = isLeftTransposed ? 'T' : 'N';
    char transb = isRightTransposed ? 'T' : 'N';
    // Note: the dimensions are swapped
    int m = (int) (isLeftTransposed ? left.getNumRows() : left.getNumColumns());
    int n = (int) (isRightTransposed ? right.getNumColumns() : right.getNumRows());
    int k = (int) (isLeftTransposed ? left.getNumColumns() : left.getNumRows());
    int k1 = (int) (isRightTransposed ? right.getNumRows() : right.getNumColumns());
    if (k != k1) throw new DMLRuntimeException("Dimension mismatch: " + k + " != " + k1);

    if (m == -1 || n == -1 || k == -1) throw new DMLRuntimeException("Incorrect dimensions");

    double alpha = 1;
    double beta = 0;

    int lda = isLeftTransposed ? k : m;
    int ldb = isRightTransposed ? n : k;
    int ldc = m;

    if (!left.getGPUObject().isAllocated() || !right.getGPUObject().isAllocated())
      throw new DMLRuntimeException(
          "One of input is not allocated:"
              + left.getGPUObject().isAllocated()
              + " "
              + right.getGPUObject().isAllocated());
    if (!output.getGPUObject().isAllocated())
      throw new DMLRuntimeException(
          "Output is not allocated:" + output.getGPUObject().isAllocated());

    Pointer A = ((JCudaObject) left.getGPUObject()).jcudaPointer;
    Pointer B = ((JCudaObject) right.getGPUObject()).jcudaPointer;
    Pointer C = ((JCudaObject) output.getGPUObject()).jcudaPointer;

    JCublas.cublasDgemm(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
  }
 public MatrixObject getMatrixInputForGPUInstruction(String varName) throws DMLRuntimeException {
   MatrixObject mo = getMatrixObject(varName);
   if (mo == null) {
     throw new DMLRuntimeException("No matrix object available for variable:" + varName);
   }
   if (mo.getGPUObject() == null) {
     mo.setGPUObject(GPUContext.createGPUObject(mo));
   }
   boolean acquired = false;
   if (!mo.getGPUObject().isAllocated()) {
     mo.acquireRead();
     acquired = true;
   }
   mo.getGPUObject().acquireDeviceRead();
   if (acquired) {
     mo.release();
   }
   return mo;
 }
 public static boolean isInSparseFormat(MatrixObject mo) {
   if (mo.getGPUObject() != null && mo.getGPUObject().isAllocated())
     return mo.getGPUObject().isInSparseFormat();
   return MatrixBlock.evalSparseFormatInMemory(mo.getNumRows(), mo.getNumColumns(), mo.getNnz());
 }
 public void releaseMatrixInputForGPUInstruction(String varName) throws DMLRuntimeException {
   MatrixObject mo = getMatrixObject(varName);
   mo.getGPUObject().releaseInput();
 }