@Override public void set(Pointer pointer) { modified.set(true); if (dataType() == DataBuffer.Type.DOUBLE) { JCublas2.cublasDcopy( ContextHolder.getInstance().getHandle(), (int) length(), pointer, 1, getHostPointer(), 1); } else { JCublas2.cublasScopy( ContextHolder.getInstance().getHandle(), (int) length(), pointer, 1, getHostPointer(), 1); } }
@Override public void copyToHost(int offset, int length) { DevicePointerInfo devicePointerInfo = pointersToContexts.get(Thread.currentThread().getName(), new Pair<>(offset, length)); if (devicePointerInfo == null) throw new IllegalStateException("No pointer found for offset " + offset); // prevent inconsistent pointers if (devicePointerInfo.getOffset() != offset) throw new IllegalStateException( "Device pointer offset didn't match specified offset in pointer map"); if (devicePointerInfo != null) { ContextHolder.syncStream(); int deviceStride = devicePointerInfo.getStride(); int deviceOffset = devicePointerInfo.getOffset(); long deviceLength = devicePointerInfo.getLength(); if (deviceOffset == 0 && length < length()) { /** * The way the data works out the stride for retrieving the data should be 1. * * <p>The device stride should be used for resetting the data. * * <p>This is for the edge case where the offset is zero and the length of the pointer is < * the actual buffer length itself. */ JCublas2.cublasGetVectorAsync( length, getElementSize(), devicePointerInfo.getPointer().withByteOffset(offset * getElementSize()), deviceStride, getHostPointer(deviceOffset), deviceStride, ContextHolder.getInstance().getCudaStream()); } else { JCublas2.cublasGetVectorAsync( (int) deviceLength, getElementSize(), devicePointerInfo.getPointer().withByteOffset(offset * getElementSize()), deviceStride, getHostPointer(deviceOffset), deviceStride, ContextHolder.getInstance().getCudaStream()); } ContextHolder.syncStream(); } else throw new IllegalStateException("No offset found to copy"); }
/** * Set an individual element * * @param index the index of the element * @param from the element to get data from */ protected void set(int index, int length, Pointer from, int inc) { modified.set(true); int offset = getElementSize() * index; if (offset >= length() * getElementSize()) throw new IllegalArgumentException( "Illegal offset " + offset + " with index of " + index + " and length " + length()); JCublas2.cublasSetVectorAsync( length, getElementSize(), from, inc, getHostPointer().withByteOffset(offset), 1, ContextHolder.getInstance().getCudaStream()); ContextHolder.syncStream(); }
@Override public boolean freeDevicePointer(int offset, int length) { String name = Thread.currentThread().getName(); DevicePointerInfo devicePointerInfo = pointersToContexts.get(name, offset); // nothing to free, there was no copy. Only the gpu pointer was reused with a different offset. if (offset != 0) pointersToContexts.remove(name, offset); else if (offset == 0 && isPersist) { return true; } else if (devicePointerInfo != null && !freed.get()) { allocated.addAndGet(-devicePointerInfo.getLength()); log.trace("freeing {} bytes, total: {}", devicePointerInfo.getLength(), allocated.get()); ContextHolder.getInstance().getMemoryStrategy().free(this, offset, length); freed.set(true); copied.remove(name); pointersToContexts.remove(name, offset); return true; } return false; }
@Override public Pointer getDevicePointer(INDArray arr, int stride, int offset, int length) { String name = Thread.currentThread().getName(); DevicePointerInfo devicePointerInfo = pointersToContexts.get(name, new Pair<>(offset, length)); if (devicePointerInfo == null) { int devicePointerLength = getElementSize() * length; allocated.addAndGet(devicePointerLength); totalAllocated.addAndGet(devicePointerLength); log.trace( "Allocating {} bytes, total: {}, overall: {}", devicePointerLength, allocated.get(), totalAllocated); // check its the same object if (arr.data() != this) { throw new IllegalArgumentException( "Unable to get pointer for array that doesn't have this as the buffer"); } int compareLength = arr instanceof IComplexNDArray ? arr.length() * 2 : arr.length(); /** * Add zero first no matter what. Allocate the whole buffer on the gpu and use offsets for any * other pointers that come in. This will allow us to set device pointers with offsets * * <p>with no extra allocation. * * <p>Notice here we ignore the length of the actual array. * * <p>We are going to allocate the whole buffer on the gpu only once. */ if (!pointersToContexts.contains(name, new Pair<>(0, this.length))) { devicePointerInfo = (DevicePointerInfo) ContextHolder.getInstance() .getConf() .getMemoryStrategy() .alloc(this, 1, 0, this.length); pointersToContexts.put(name, new Pair<>(0, this.length), devicePointerInfo); } if (offset > 0) { /** * Store the length for the offset of the pointer. Return the original pointer with an * offset (these pointers can't be reused?) * * <p>With the device pointer info, we want to store the original pointer. When retrieving * the vector from the gpu later, we will use the recorded offset. * * <p>Due to gpu instability (please correct me if I'm wrong here) we can't seem to reuse * the pointers with the offset specified, therefore it is desirable to recreate this * pointer later. * * <p>This will prevent extra allocation as well as inform the length for retrieving data * from the gpu for this particular offset and buffer. */ DevicePointerInfo info2 = pointersToContexts.get(name, new Pair<>(0, this.length)); if (info2 == null) throw new IllegalStateException( "No pointer found for name " + name + " and offset/length " + offset + " / " + length); Pointer zero = info2.getPointer(); Pointer ret = info2.getPointer().withByteOffset(offset * getElementSize()); devicePointerInfo = new DevicePointerInfo(zero, length, stride, offset); pointersToContexts.put(name, new Pair<>(offset, compareLength), devicePointerInfo); return ret; } else if (offset == 0 && compareLength < arr.data().length()) { DevicePointerInfo info2 = pointersToContexts.get(name, new Pair<>(0, this.length)); DevicePointerInfo info3 = new DevicePointerInfo( info2.getPointer(), this.length, BlasBufferUtil.getBlasStride(arr), arr.offset()); int compareLength2 = arr instanceof IComplexNDArray ? arr.length() * 2 : arr.length(); /** * Need a pointer that points at the buffer but doesnt extend all the way to the end. This * is for data like the first row of a matrix that has zero offset but does not extend all * the way to the end of the buffer. */ pointersToContexts.put(name, new Pair<>(offset, compareLength2), info3); return info3.getPointer(); } freed.set(false); } /** * Return the device pointer with the specified offset. Regardless of whether the device pointer * has been allocated, we need to return with it respect to the specified array not the array's * underlying buffer. */ if (devicePointerInfo == null && offset == 0 && length < length()) { DevicePointerInfo origin = pointersToContexts.get(Thread.currentThread().getName(), new Pair<>(0, length())); DevicePointerInfo newInfo = new DevicePointerInfo(origin.getPointer(), length, stride, 0); return newInfo.getPointer(); } return devicePointerInfo.getPointer().withByteOffset(offset * getElementSize()); }
@Override public Pointer getDevicePointer(int stride, int offset, int length) { String name = Thread.currentThread().getName(); DevicePointerInfo devicePointerInfo = pointersToContexts.get(name, new Pair<>(offset, length)); if (devicePointerInfo == null) { int devicePointerLength = getElementSize() * length; allocated.addAndGet(devicePointerLength); totalAllocated.addAndGet(devicePointerLength); log.trace( "Allocating {} bytes, total: {}, overall: {}", devicePointerLength, allocated.get(), totalAllocated); if (devicePointerInfo == null) { /** * Add zero first no matter what. Allocate the whole buffer on the gpu and use offsets for * any other pointers that come in. This will allow us to set device pointers with offsets * * <p>with no extra allocation. * * <p>Notice here we ignore the length of the actual array. * * <p>We are going to allocate the whole buffer on the gpu only once. */ if (!pointersToContexts.contains(name, new Pair<>(0, this.length))) { devicePointerInfo = (DevicePointerInfo) ContextHolder.getInstance() .getConf() .getMemoryStrategy() .alloc(this, 1, 0, this.length); pointersToContexts.put(name, new Pair<>(0, this.length), devicePointerInfo); } if (offset > 0) { /** * Store the length for the offset of the pointer. Return the original pointer with an * offset (these pointers can't be reused?) * * <p>With the device pointer info, we want to store the original pointer. When retrieving * the vector from the gpu later, we will use the recorded offset. * * <p>Due to gpu instability (please correct me if I'm wrong here) we can't seem to reuse * the pointers with the offset specified, therefore it is desirable to recreate this * pointer later. * * <p>This will prevent extra allocation as well as inform the length for retrieving data * from the gpu for this particular offset and buffer. */ Pointer zero = pointersToContexts.get(name, 0).getPointer(); Pointer ret = pointersToContexts .get(name, 0) .getPointer() .withByteOffset(offset * getElementSize()); devicePointerInfo = new DevicePointerInfo(zero, length, stride, offset); pointersToContexts.put(name, new Pair<>(offset, length), devicePointerInfo); return ret; } } freed.set(false); } /** * Return the device pointer with the specified offset. Regardless of whether the device pointer * has been allocated, we need to return with it respect to the specified array not the array's * underlying buffer. */ return devicePointerInfo.getPointer().withByteOffset(offset * getElementSize()); }