Example #1
0
  // result = this*other
  public DMatrix mmul(boolean tA, boolean tB, DMatrix B, DMatrix C) {

    int m = tA ? this.columns() : this.rows();
    int n = tB ? B.rows() : B.columns();
    int k = tA ? this.rows() : this.columns();
    int kB = tB ? B.columns() : B.rows();
    assert (k == kB);
    if (C.rows != m || C.columns != n) {
      if (C != this && C != B) {
        C.resize(m, n);
      } else {
        System.err.printf(
            "[ALERT] Should not resize result matrix because it is used in-place. But doing it anyway.\n");
      }
    }

    if (C == this || C == B) {
      /* actually, blas cannot do multiplications in-place. Therefore, we will fake by
       * * allocating a temporary object on the side and copy the result later.
       * */
      DMatrix temp = new CUDAMatrix(m, n);
      if (m == 1) {
        SimpleCuBlas.gemv(tB, B, this, temp, 1.0, 0.0);
      } else {
        SimpleCuBlas.gemm(tA, tB, this, B, temp, 1.0, 0.0);
      }
      if (temp.rows() == C.rows() && temp.columns() == C.columns()) SimpleCuBlas.copy(temp, C);
      else {
        C.resize(m, n);
        SimpleCuBlas.copy(temp, C);
      }
    } else {
      if (m == 1) {
        SimpleCuBlas.gemv(tB, B, this, C, 1.0, 0.0);
      } else {
        SimpleCuBlas.gemm(tA, tB, this, B, C, 1.0, 0.0);
      }
    }
    return C;
  }