private void computeCL(boolean is64bit) { double minX = transformX(-ww * 0.5) + offsetX; double maxX = transformX(ww * 0.5) + offsetX; double minY = transformY(-wh * 0.5) + offsetY; double maxY = transformY(wh * 0.5) + offsetY; double rangeX = maxX - minX; double rangeY = maxY - minY; kernel2DGlobalWorkSize.put(0, ww).put(1, wh); // start computation clSetKernelArg1i(clKernel, 0, ww); clSetKernelArg1i(clKernel, 1, wh); if (!is64bit || !isDoubleFPAvailable(deviceCaps)) { clSetKernelArg1f(clKernel, 2, (float) minX); clSetKernelArg1f(clKernel, 3, (float) minY); clSetKernelArg1f(clKernel, 4, (float) rangeX); clSetKernelArg1f(clKernel, 5, (float) rangeY); } else { clSetKernelArg1d(clKernel, 2, minX); clSetKernelArg1d(clKernel, 3, minY); clSetKernelArg1d(clKernel, 4, rangeX); clSetKernelArg1d(clKernel, 5, rangeY); } // acquire GL objects, and enqueue a kernel with a probe from the list int errcode = clEnqueueAcquireGLObjects(clQueue, clTexture, null, null); checkCLError(errcode); errcode = clEnqueueNDRangeKernel( clQueue, clKernel, 2, null, kernel2DGlobalWorkSize, null, null, null); checkCLError(errcode); errcode = clEnqueueReleaseGLObjects(clQueue, clTexture, null, !syncGLtoCL ? syncBuffer : null); checkCLError(errcode); if (!syncGLtoCL) { clEvent = syncBuffer.get(0); glFenceFromCLEvent = glCreateSyncFromCLeventARB(clContext, clEvent, 0); } // block until done (important: finish before doing further gl work) if (syncGLtoCL) { errcode = clFinish(clQueue); checkCLError(errcode); } }
private void initGLObjects() { if (clTexture != NULL) { checkCLError(clReleaseMemObject(clTexture)); glDeleteTextures(glTexture); } glTexture = glGenTextures(); // Init textures glBindTexture(GL_TEXTURE_2D, glTexture); glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8UI, ww, wh, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, (ByteBuffer) null); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); clTexture = clCreateFromGLTexture2D( clContext, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, glTexture, errcode_ret); checkCLError(errcode_ret); glBindTexture(GL_TEXTURE_2D, 0); glViewport(0, 0, fbw, fbh); glUniform2f(sizeUniform, ww, wh); FloatBuffer projectionMatrix = BufferUtils.createFloatBuffer(4 * 4); glOrtho(0.0f, ww, 0.0f, wh, 0.0f, 1.0f, projectionMatrix); glUniformMatrix4fv(projectionUniform, false, projectionMatrix); shouldInitBuffers = false; }
private static long getDevice(long platform, CLCapabilities platformCaps, int deviceType) { MemoryStack stack = stackPush(); try { IntBuffer pi = stack.mallocInt(1); checkCLError(clGetDeviceIDs(platform, deviceType, null, pi)); PointerBuffer devices = stack.mallocPointer(pi.get(0)); checkCLError(clGetDeviceIDs(platform, deviceType, devices, null)); for (int i = 0; i < devices.capacity(); i++) { long device = devices.get(i); CLCapabilities caps = CL.createDeviceCapabilities(device, platformCaps); if (!(caps.cl_khr_gl_sharing || caps.cl_APPLE_gl_sharing)) continue; return device; } } finally { stack.pop(); } return NULL; }
private void renderGL() { glClear(GL_COLOR_BUFFER_BIT); // draw slices if (!syncGLtoCL) { glWaitSync(glFenceFromCLEvent, 0, 0); glDeleteSync(glFenceFromCLEvent); glFenceFromCLEvent = NULL; int errcode = clReleaseEvent(clEvent); clEvent = NULL; checkCLError(errcode); } glBindTexture(GL_TEXTURE_2D, glTexture); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); }
private void buildProgram() { if (clProgram != NULL) { int errcode = clReleaseProgram(clProgram); checkCLError(errcode); } PointerBuffer strings = BufferUtils.createPointerBuffer(1); PointerBuffer lengths = BufferUtils.createPointerBuffer(1); strings.put(0, source); lengths.put(0, source.remaining()); clProgram = clCreateProgramWithSource(clContext, strings, lengths, errcode_ret); checkCLError(errcode_ret); final CountDownLatch latch = new CountDownLatch(1); // disable 64bit floating point math if not available StringBuilder options = new StringBuilder("-D USE_TEXTURE"); if (doublePrecision && isDoubleFPAvailable(deviceCaps)) { // cl_khr_fp64 options.append(" -D DOUBLE_FP"); // AMD's verson of double precision floating point math if (!deviceCaps.cl_khr_fp64 && deviceCaps.cl_amd_fp64) options.append(" -D AMD_FP"); } log("OpenCL COMPILER OPTIONS: " + options); CLProgramCallback buildCallback; int errcode = clBuildProgram( clProgram, device, options, buildCallback = new CLProgramCallback() { @Override public void invoke(long program, long user_data) { log( String.format( "The cl_program [0x%X] was built %s", program, getProgramBuildInfoInt(program, device, CL_PROGRAM_BUILD_STATUS) == CL_SUCCESS ? "successfully" : "unsuccessfully")); String log = getProgramBuildInfoStringASCII(program, device, CL_PROGRAM_BUILD_LOG); if (!log.isEmpty()) log(String.format("BUILD LOG:\n----\n%s\n-----", log)); latch.countDown(); } }, NULL); checkCLError(errcode); // Make sure the program has been built before proceeding try { latch.await(); } catch (InterruptedException e) { throw new RuntimeException(e); } buildCallback.free(); rebuild = false; // init kernel with constants clKernel = clCreateKernel(clProgram, "mandelbrot", errcode_ret); checkCLError(errcode_ret); }
private static void release(long object, CLCleaner cleaner) { if (object == NULL) return; int errcode = cleaner.release(object); checkCLError(errcode); }
public Mandelbrot( long platform, CLCapabilities platformCaps, GLFWWindow window, int deviceType, boolean debugGL, int maxIterations) { this.platform = platform; this.window = window; this.maxIterations = maxIterations; IntBuffer size = BufferUtils.createIntBuffer(2); nglfwGetWindowSize(window.handle, memAddress(size), memAddress(size) + 4); ww = size.get(0); wh = size.get(1); nglfwGetFramebufferSize(window.handle, memAddress(size), memAddress(size) + 4); fbw = size.get(0); fbh = size.get(1); glfwMakeContextCurrent(window.handle); GLCapabilities glCaps = GL.createCapabilities(); if (!glCaps.OpenGL30) throw new RuntimeException("OpenGL 3.0 is required to run this demo."); debugProc = debugGL ? GLUtil.setupDebugMessageCallback() : null; glfwSwapInterval(0); errcode_ret = BufferUtils.createIntBuffer(1); try { // Find devices with GL sharing support { long device = getDevice(platform, platformCaps, deviceType); if (device == NULL) device = getDevice(platform, platformCaps, CL_DEVICE_TYPE_CPU); if (device == NULL) throw new RuntimeException("No OpenCL devices found with OpenGL sharing support."); this.device = device; this.deviceCaps = CL.createDeviceCapabilities(device, platformCaps); } // Create the context PointerBuffer ctxProps = BufferUtils.createPointerBuffer(7); switch (Platform.get()) { case WINDOWS: ctxProps .put(CL_GL_CONTEXT_KHR) .put(glfwGetWGLContext(window.handle)) .put(CL_WGL_HDC_KHR) .put(wglGetCurrentDC()); break; case LINUX: ctxProps .put(CL_GL_CONTEXT_KHR) .put(glfwGetGLXContext(window.handle)) .put(CL_GLX_DISPLAY_KHR) .put(glfwGetX11Display()); break; case MACOSX: ctxProps .put(APPLEGLSharing.CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE) .put(CGLGetShareGroup(CGLGetCurrentContext())); } ctxProps.put(CL_CONTEXT_PLATFORM).put(platform).put(NULL).flip(); clContext = clCreateContext( ctxProps, device, clContextCB = new CLContextCallback() { @Override public void invoke(long errinfo, long private_info, long cb, long user_data) { log(String.format("cl_context_callback\n\tInfo: %s", memUTF8(errinfo))); } }, NULL, errcode_ret); checkCLError(errcode_ret); // create command queues for every GPU, setup colormap and init kernels IntBuffer colorMapBuffer = BufferUtils.createIntBuffer(32 * 2); initColorMap(colorMapBuffer, 32, Color.BLUE, Color.GREEN, Color.RED); clColorMap = clCreateBuffer( clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, colorMapBuffer, errcode_ret); checkCLError(errcode_ret); // create command queue and upload color map buffer clQueue = clCreateCommandQueue(clContext, device, NULL, errcode_ret); checkCLError(errcode_ret); // load program(s) if (deviceType == CL_DEVICE_TYPE_GPU) log("OpenCL Device Type: GPU (Use -forceCPU to use CPU)"); else log("OpenCL Device Type: CPU"); log("Max Iterations: " + maxIterations + " (Use -iterations <count> to change)"); log("Display resolution: " + ww + "x" + wh + " (Use -res <width> <height> to change)"); log("OpenGL glCaps.GL_ARB_sync = " + glCaps.GL_ARB_sync); log("OpenGL glCaps.GL_ARB_cl_event = " + glCaps.GL_ARB_cl_event); buildProgram(); // Detect GLtoCL synchronization method syncGLtoCL = !glCaps.GL_ARB_cl_event; // GL3.2 or ARB_sync implied log(syncGLtoCL ? "GL to CL sync: Using clFinish" : "GL to CL sync: Using OpenCL events"); // Detect CLtoGL synchronization method syncCLtoGL = !deviceCaps.cl_khr_gl_event; log( syncCLtoGL ? "CL to GL sync: Using glFinish" : "CL to GL sync: Using implicit sync (cl_khr_gl_event)"); vao = glGenVertexArrays(); glBindVertexArray(vao); vbo = glGenBuffers(); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData( GL_ARRAY_BUFFER, stackPush() .floats( 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), GL_STATIC_DRAW); stackPop(); vsh = glCreateShader(GL_VERTEX_SHADER); glShaderSource( vsh, "#version 150\n" + "\n" + "uniform mat4 projection;\n" + "\n" + "uniform vec2 size;\n" + "\n" + "in vec2 posIN;\n" + "in vec2 texIN;\n" + "\n" + "out vec2 texCoord;\n" + "\n" + "void main(void) {\n" + "\tgl_Position = projection * vec4(posIN * size, 0.0, 1.0);\n" + "\ttexCoord = texIN;\n" + "}"); glCompileShader(vsh); String log = glGetShaderInfoLog(vsh, glGetShaderi(vsh, GL_INFO_LOG_LENGTH)); if (!log.isEmpty()) log(String.format("VERTEX SHADER LOG: %s", log)); fsh = glCreateShader(GL_FRAGMENT_SHADER); glShaderSource( fsh, "#version 150\n" + "\n" + "uniform isampler2D mandelbrot;\n" + "\n" + "in vec2 texCoord;\n" + "\n" + "out vec4 fragColor;\n" + "\n" + "void main(void) {\n" + "\tfragColor = texture(mandelbrot, texCoord) / 255.0;\n" + "}"); glCompileShader(fsh); log = glGetShaderInfoLog(fsh, glGetShaderi(fsh, GL_INFO_LOG_LENGTH)); if (!log.isEmpty()) log(String.format("FRAGMENT SHADER LOG: %s", log)); glProgram = glCreateProgram(); glAttachShader(glProgram, vsh); glAttachShader(glProgram, fsh); glLinkProgram(glProgram); log = glGetProgramInfoLog(glProgram, glGetProgrami(glProgram, GL_INFO_LOG_LENGTH)); if (!log.isEmpty()) log(String.format("PROGRAM LOG: %s", log)); int posIN = glGetAttribLocation(glProgram, "posIN"); int texIN = glGetAttribLocation(glProgram, "texIN"); glVertexAttribPointer(posIN, 2, GL_FLOAT, false, 4 * 4, 0); glVertexAttribPointer(texIN, 2, GL_FLOAT, false, 4 * 4, 2 * 4); glEnableVertexAttribArray(posIN); glEnableVertexAttribArray(texIN); projectionUniform = glGetUniformLocation(glProgram, "projection"); sizeUniform = glGetUniformLocation(glProgram, "size"); glUseProgram(glProgram); glUniform1i(glGetUniformLocation(glProgram, "mandelbrot"), 0); } catch (Exception e) { // TODO: cleanup throw new RuntimeException(e); } glDisable(GL_DEPTH_TEST); glClearColor(0.0f, 0.0f, 0.0f, 1.0f); initGLObjects(); glFinish(); setKernelConstants(); glfwSetWindowSizeCallback( window.handle, window.windowsizefun = new GLFWWindowSizeCallback() { @Override public void invoke(long window, final int width, final int height) { if (width == 0 || height == 0) return; events.add( new Runnable() { @Override public void run() { Mandelbrot.this.ww = width; Mandelbrot.this.wh = height; shouldInitBuffers = true; } }); } }); glfwSetFramebufferSizeCallback( window.handle, window.framebuffersizefun = new GLFWFramebufferSizeCallback() { @Override public void invoke(long window, final int width, final int height) { if (width == 0 || height == 0) return; events.add( new Runnable() { @Override public void run() { Mandelbrot.this.fbw = width; Mandelbrot.this.fbh = height; shouldInitBuffers = true; } }); } }); glfwSetKeyCallback( window.handle, window.keyfun = new GLFWKeyCallback() { @Override public void invoke(long window, int key, int scancode, int action, int mods) { switch (key) { case GLFW_KEY_LEFT_CONTROL: case GLFW_KEY_RIGHT_CONTROL: ctrlDown = action == GLFW_PRESS; return; } if (action != GLFW_PRESS) return; switch (key) { case GLFW_KEY_ESCAPE: glfwSetWindowShouldClose(window, GLFW_TRUE); break; case GLFW_KEY_D: events.offer( new Runnable() { @Override public void run() { doublePrecision = !doublePrecision; log("DOUBLE PRECISION IS NOW: " + (doublePrecision ? "ON" : "OFF")); rebuild = true; } }); break; case GLFW_KEY_HOME: events.offer( new Runnable() { @Override public void run() { offsetX = -0.5; offsetY = 0.0; zoom = 1.0; } }); break; } } }); glfwSetMouseButtonCallback( window.handle, window.mousebuttonfun = new GLFWMouseButtonCallback() { @Override public void invoke(long window, int button, int action, int mods) { if (button != GLFW_MOUSE_BUTTON_LEFT) return; dragging = action == GLFW_PRESS; if (dragging) { dragging = true; dragX = mouseX; dragY = mouseY; dragOffsetX = offsetX; dragOffsetY = offsetY; } } }); glfwSetCursorPosCallback( window.handle, window.cursorposfun = new GLFWCursorPosCallback() { @Override public void invoke(long window, double xpos, double ypos) { mouseX = xpos; mouseY = wh - ypos; if (dragging) { offsetX = dragOffsetX + transformX(dragX - mouseX); offsetY = dragOffsetY + transformY(dragY - mouseY); } } }); glfwSetScrollCallback( window.handle, window.scrollfun = new GLFWScrollCallback() { @Override public void invoke(long window, double xoffset, double yoffset) { if (yoffset == 0) return; double scrollX = mouseX - ww * 0.5; double scrollY = mouseY - wh * 0.5; double zoomX = transformX(scrollX); double zoomY = transformY(scrollY); zoom *= (1.0 - yoffset * (ctrlDown ? 0.25 : 0.05)); offsetX += zoomX - transformX(scrollX); offsetY += zoomY - transformY(scrollY); } }); }