@Test public void writeCopyReadBufferTest() { out.println(" - - - highLevelTest; copy buffer test - - - "); final int elements = NUM_ELEMENTS; final CLContext context = CLContext.create(); // the CL.MEM_* flag is probably completely irrelevant in our case since we do not use a kernel // in this test final CLBuffer<ByteBuffer> clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); final CLBuffer<ByteBuffer> clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); // fill only first read buffer -> we will copy the payload to the second later. fillBuffer(clBufferA.buffer, 12345); final CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); // asynchronous write of data to GPU device, blocking read later to get the computed results // back. queue .putWriteBuffer(clBufferA, false) // write A .putCopyBuffer(clBufferA, clBufferB, clBufferA.buffer.capacity()) // copy A -> B .putReadBuffer(clBufferB, true) // read B .finish(); context.release(); out.println("validating computed results..."); checkIfEqual(clBufferA.buffer, clBufferB.buffer, elements); out.println("results are valid"); }
@Test public void copyLimitedSlicedBuffersTest() { final int size = 4200 * SIZEOF_INT; // Arbitrary number that is a multiple of SIZEOF_INT; final int padding = 307; // Totally arbitrary number > 0 final CLContext context = CLContext.create(); final CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); // Make a buffer that is offset relative to the originally allocated position and has a limit // that is // not equal to the capacity to test whether all these attributes are correctly handled. ByteBuffer hostBuffer = ByteBuffer.allocateDirect(size + padding); hostBuffer.position(padding / 2); // Offset the original buffer hostBuffer = hostBuffer.slice(); // Slice it to have a new buffer that starts at the offset hostBuffer.limit(size); hostBuffer.order(ByteOrder.nativeOrder()); // Necessary for comparisons to work later on. fillBuffer(hostBuffer, 12345); final CLBuffer<ByteBuffer> bufferA = context.createBuffer(size).cloneWith(hostBuffer); final CLBuffer<ByteBuffer> bufferB = context.createByteBuffer(size); queue .putWriteBuffer(bufferA, false) .putCopyBuffer(bufferA, bufferB, bufferA.getNIOSize()) .putReadBuffer(bufferB, true) .finish(); hostBuffer.rewind(); bufferB.buffer.rewind(); checkIfEqual(hostBuffer, bufferB.buffer, size / SIZEOF_INT); context.release(); }
@Test public void mapBufferTest() { out.println(" - - - highLevelTest; map buffer test - - - "); final int elements = NUM_ELEMENTS; final int sizeInBytes = elements * SIZEOF_INT; CLContext context; CLBuffer<?> clBufferA; CLBuffer<?> clBufferB; // We will have to allocate mappable NIO memory on non CPU contexts // since we can't map e.g GPU memory. if (CLPlatform.getDefault().listCLDevices(CLDevice.Type.CPU).length > 0) { context = CLContext.create(CLDevice.Type.CPU); clBufferA = context.createBuffer(sizeInBytes, Mem.READ_WRITE); clBufferB = context.createBuffer(sizeInBytes, Mem.READ_WRITE); } else { context = CLContext.create(); clBufferA = context.createByteBuffer(sizeInBytes, Mem.READ_WRITE, Mem.USE_BUFFER); clBufferB = context.createByteBuffer(sizeInBytes, Mem.READ_WRITE, Mem.USE_BUFFER); } final CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); // fill only first buffer -> we will copy the payload to the second later. final ByteBuffer mappedBufferA = queue.putMapBuffer(clBufferA, Map.WRITE, true); assertEquals(sizeInBytes, mappedBufferA.capacity()); fillBuffer(mappedBufferA, 12345); // write to A queue .putUnmapMemory(clBufferA, mappedBufferA) // unmap A .putCopyBuffer(clBufferA, clBufferB); // copy A -> B // map B for read operations final ByteBuffer mappedBufferB = queue.putMapBuffer(clBufferB, Map.READ, true); assertEquals(sizeInBytes, mappedBufferB.capacity()); out.println("validating computed results..."); checkIfEqual(mappedBufferA, mappedBufferB, elements); // A == B ? out.println("results are valid"); queue.putUnmapMemory(clBufferB, mappedBufferB); // unmap B context.release(); }
@Test public void bufferWithHostPointerTest() { out.println(" - - - highLevelTest; host pointer test - - - "); final int elements = NUM_ELEMENTS; final CLContext context = CLContext.create(); final ByteBuffer buffer = Buffers.newDirectByteBuffer(elements * SIZEOF_INT); // fill only first read buffer -> we will copy the payload to the second later. fillBuffer(buffer, 12345); final CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); final Mem[] bufferConfig = new Mem[] {Mem.COPY_BUFFER, Mem.USE_BUFFER}; for (int i = 0; i < bufferConfig.length; i++) { out.println("testing with " + bufferConfig[i] + " config"); final CLBuffer<ByteBuffer> clBufferA = context.createBuffer(buffer, Mem.READ_ONLY, bufferConfig[i]); final CLBuffer<ByteBuffer> clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); // asynchronous write of data to GPU device, blocking read later to get the computed results // back. queue .putCopyBuffer(clBufferA, clBufferB, clBufferA.buffer.capacity()) // copy A -> B .putReadBuffer(clBufferB, true) // read B .finish(); assertEquals(2, context.getMemoryObjects().size()); clBufferA.release(); assertEquals(1, context.getMemoryObjects().size()); clBufferB.release(); assertEquals(0, context.getMemoryObjects().size()); // uploading worked when a==b. out.println("validating computed results..."); checkIfEqual(clBufferA.buffer, clBufferB.buffer, elements); out.println("results are valid"); } context.release(); }
public static int clEnqueueWriteBufferRect( CLCommandQueue command_queue, CLMem buffer, int blocking_write, PointerBuffer buffer_offset, PointerBuffer host_offset, PointerBuffer region, long buffer_row_pitch, long buffer_slice_pitch, long host_row_pitch, long host_slice_pitch, ShortBuffer ptr, PointerBuffer event_wait_list, PointerBuffer event) { long function_pointer = CLCapabilities.clEnqueueWriteBufferRect; BufferChecks.checkFunctionAddress(function_pointer); BufferChecks.checkBuffer(buffer_offset, 3); BufferChecks.checkBuffer(host_offset, 3); BufferChecks.checkBuffer(region, 3); BufferChecks.checkBuffer( ptr, CLChecks.calculateBufferRectSize(host_offset, region, host_row_pitch, host_slice_pitch)); if (event_wait_list != null) BufferChecks.checkDirect(event_wait_list); if (event != null) BufferChecks.checkBuffer(event, 1); int __result = nclEnqueueWriteBufferRect( command_queue.getPointer(), buffer.getPointer(), blocking_write, MemoryUtil.getAddress(buffer_offset), MemoryUtil.getAddress(host_offset), MemoryUtil.getAddress(region), buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, MemoryUtil.getAddress(ptr), (event_wait_list == null ? 0 : event_wait_list.remaining()), MemoryUtil.getAddressSafe(event_wait_list), MemoryUtil.getAddressSafe(event), function_pointer); if (__result == CL10.CL_SUCCESS) command_queue.registerCLEvent(event); return __result; }
public static int clEnqueueCopyBufferRect( CLCommandQueue command_queue, CLMem src_buffer, CLMem dst_buffer, PointerBuffer src_origin, PointerBuffer dst_origin, PointerBuffer region, long src_row_pitch, long src_slice_pitch, long dst_row_pitch, long dst_slice_pitch, PointerBuffer event_wait_list, PointerBuffer event) { long function_pointer = CLCapabilities.clEnqueueCopyBufferRect; BufferChecks.checkFunctionAddress(function_pointer); BufferChecks.checkBuffer(src_origin, 3); BufferChecks.checkBuffer(dst_origin, 3); BufferChecks.checkBuffer(region, 3); if (event_wait_list != null) BufferChecks.checkDirect(event_wait_list); if (event != null) BufferChecks.checkBuffer(event, 1); int __result = nclEnqueueCopyBufferRect( command_queue.getPointer(), src_buffer.getPointer(), dst_buffer.getPointer(), MemoryUtil.getAddress(src_origin), MemoryUtil.getAddress(dst_origin), MemoryUtil.getAddress(region), src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, (event_wait_list == null ? 0 : event_wait_list.remaining()), MemoryUtil.getAddressSafe(event_wait_list), MemoryUtil.getAddressSafe(event), function_pointer); if (__result == CL10.CL_SUCCESS) command_queue.registerCLEvent(event); return __result; }