// result = this*other public DMatrix mmul(boolean tA, boolean tB, DMatrix B, DMatrix C) { int m = tA ? this.columns() : this.rows(); int n = tB ? B.rows() : B.columns(); int k = tA ? this.rows() : this.columns(); int kB = tB ? B.columns() : B.rows(); assert (k == kB); if (C.rows != m || C.columns != n) { if (C != this && C != B) { C.resize(m, n); } else { System.err.printf( "[ALERT] Should not resize result matrix because it is used in-place. But doing it anyway.\n"); } } if (C == this || C == B) { /* actually, blas cannot do multiplications in-place. Therefore, we will fake by * * allocating a temporary object on the side and copy the result later. * */ DMatrix temp = new CUDAMatrix(m, n); if (m == 1) { SimpleCuBlas.gemv(tB, B, this, temp, 1.0, 0.0); } else { SimpleCuBlas.gemm(tA, tB, this, B, temp, 1.0, 0.0); } if (temp.rows() == C.rows() && temp.columns() == C.columns()) SimpleCuBlas.copy(temp, C); else { C.resize(m, n); SimpleCuBlas.copy(temp, C); } } else { if (m == 1) { SimpleCuBlas.gemv(tB, B, this, C, 1.0, 0.0); } else { SimpleCuBlas.gemm(tA, tB, this, B, C, 1.0, 0.0); } } return C; }
public DMatrix sumColumns() { DMatrix sum = DMath.createMatrix(this.rows, 1); DMatrix multiplier = DMath.createOnesMatrix(this.columns, 1); SimpleCuBlas.gemm(false, false, this, multiplier, sum, 1.0, 0.0); return sum; }