// result = this*other public DMatrix mmul(boolean tA, boolean tB, DMatrix B, DMatrix C) { int m = tA ? this.columns() : this.rows(); int n = tB ? B.rows() : B.columns(); int k = tA ? this.rows() : this.columns(); int kB = tB ? B.columns() : B.rows(); assert (k == kB); if (C.rows != m || C.columns != n) { if (C != this && C != B) { C.resize(m, n); } else { System.err.printf( "[ALERT] Should not resize result matrix because it is used in-place. But doing it anyway.\n"); } } if (C == this || C == B) { /* actually, blas cannot do multiplications in-place. Therefore, we will fake by * * allocating a temporary object on the side and copy the result later. * */ DMatrix temp = new CUDAMatrix(m, n); if (m == 1) { SimpleCuBlas.gemv(tB, B, this, temp, 1.0, 0.0); } else { SimpleCuBlas.gemm(tA, tB, this, B, temp, 1.0, 0.0); } if (temp.rows() == C.rows() && temp.columns() == C.columns()) SimpleCuBlas.copy(temp, C); else { C.resize(m, n); SimpleCuBlas.copy(temp, C); } } else { if (m == 1) { SimpleCuBlas.gemv(tB, B, this, C, 1.0, 0.0); } else { SimpleCuBlas.gemm(tA, tB, this, B, C, 1.0, 0.0); } } return C; }
public DMatrix dotRows(DMatrix B) { assert (this.rows() == B.rows() && this.columns() == B.columns()); return this.mul(B).sumColumns(); }
public DMatrix mulRowsi(DMatrix colVector) { assert (this.rows() == colVector.rows() && colVector.columns() == 1); SimpleCuBlas.mulRows(colVector, this); return this; }
public DMatrix mulRows(DMatrix colVector) { assert (this.rows() == colVector.rows() && colVector.columns() == 1); DMatrix m = new CUDAMatrix(this.rows(), this.columns(), this.toArray()); SimpleCuBlas.mulRows(colVector, m); return m; }
public DMatrix mmuli(DMatrix B) { assert (this.columns() == B.rows()); return mmul(false, false, B, this); }