public Grid2D add(OpenCLGrid2D image1, OpenCLGrid2D image2) { // create context if (context == null) { context = OpenCLUtil.getStaticContext(); } // select device if (device == null) { device = context.getMaxFlopsDevice(); } // define local and global sizes int width = Math.min(image1.getWidth(), image2.getWidth()); int height = Math.min(image1.getHeight(), image2.getHeight()); int imageSize = width * height; int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 8); int globalWorkSizeW = OpenCLUtil.roundUp( localWorkSize, width); // rounded up to the nearest multiple of localWorkSize int globalWorkSizeH = OpenCLUtil.roundUp(localWorkSize, height); // load sources, create and build programm if (program == null) { try { program = context.createProgram(this.getClass().getResourceAsStream("exercise4.cl")).build(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(-1); } } // create output image CLBuffer<FloatBuffer> output = context.createFloatBuffer(imageSize, Mem.WRITE_ONLY); if (kernel == null) { kernel = program.createCLKernel("addImages"); } // createCommandQueue CLCommandQueue queue = device.createCommandQueue(); image1.getDelegate().prepareForDeviceOperation(); image2.getDelegate().prepareForDeviceOperation(); // put memory on the graphics card kernel .putArg(image1.getDelegate().getCLBuffer()) .putArg(image2.getDelegate().getCLBuffer()) .putArg(output) .putArg(width) .putArg(height); kernel.rewind(); queue .put2DRangeKernel( kernel, 0, 0, globalWorkSizeW, globalWorkSizeH, localWorkSize, localWorkSize) .putBarrier() // put memory from graphic card to host .putReadBuffer(output, true) .finish(); Grid2D result = new Grid2D(image1); output.getBuffer().rewind(); for (int i = 0; i < result.getSize()[1]; ++i) { for (int j = 0; j < result.getSize()[0]; j++) { result.setAtIndex(j, i, output.getBuffer().get()); } } output.release(); queue.release(); return result; }
protected void init() { if (!initialized) { largeVolumeMode = false; int reconDimensionX = getGeometry().getReconDimensionX(); int reconDimensionY = getGeometry().getReconDimensionY(); int reconDimensionZ = getGeometry().getReconDimensionZ(); projectionsAvailable = new ArrayList<Integer>(); projectionsDone = new ArrayList<Integer>(); // Initialize JOCL. context = OpenCLUtil.createContext(); try { // get the fastest device device = context.getMaxFlopsDevice(); // create the command queue commandQueue = device.createCommandQueue(); // initialize the program if (program == null || !program.getContext().equals(this.context)) { program = context .createProgram( OpenCLCompensatedBackProjector.class.getResourceAsStream( "compensatedBackprojectCL.cl")) .build(); } } catch (Exception e) { if (commandQueue != null) commandQueue.release(); if (kernelFunction != null) kernelFunction.release(); if (program != null) program.release(); // destory context if (context != null) context.release(); // TODO: handle exception e.printStackTrace(); } // check space on device: long memory = device.getMaxMemAllocSize(); long availableMemory = (memory); long requiredMemory = (long) (((((double) reconDimensionX) * reconDimensionY * ((double) reconDimensionZ) * 4) + (((double) Configuration.getGlobalConfiguration().getGeometry().getDetectorHeight()) * Configuration.getGlobalConfiguration().getGeometry().getDetectorWidth() * 4))); if (debug) { System.out.println("Total available Memory on OpenCL card:" + availableMemory); System.out.println("Required Memory on OpenCL card:" + requiredMemory); } if (requiredMemory > availableMemory) { nSteps = (int) OpenCLUtil.iDivUp(requiredMemory, availableMemory); if (debug) System.out.println("Switching to large volume mode with nSteps = " + nSteps); largeVolumeMode = true; } if (debug) { // TODO replace /* CUdevprop prop = new CUdevprop(); JCudaDriver.cuDeviceGetProperties(prop, dev); System.out.println(prop.toFormattedString()); */ } // create the computing kernel kernelFunction = program.createCLKernel("backprojectKernel"); // create the reconstruction volume; int memorysize = reconDimensionX * reconDimensionY * reconDimensionZ * 4; if (largeVolumeMode) { subVolumeZ = OpenCLUtil.iDivUp(reconDimensionZ, nSteps); if (debug) System.out.println("SubVolumeZ: " + subVolumeZ); h_volume = new float[reconDimensionX * reconDimensionY * subVolumeZ]; memorysize = reconDimensionX * reconDimensionY * subVolumeZ * 4; if (debug) System.out.println("Memory: " + memorysize); } else { h_volume = new float[reconDimensionX * reconDimensionY * reconDimensionZ]; } // compute adapted volume size // volume size in x = multiple of bpBlockSize[0] // volume size in y = multiple of bpBlockSize[1] int adaptedVolSize[] = new int[3]; if ((reconDimensionX % bpBlockSize[0]) == 0) { adaptedVolSize[0] = reconDimensionX; } else { adaptedVolSize[0] = ((reconDimensionX / bpBlockSize[0]) + 1) * bpBlockSize[0]; } if ((reconDimensionY % bpBlockSize[1]) == 0) { adaptedVolSize[1] = reconDimensionY; } else { adaptedVolSize[1] = ((reconDimensionY / bpBlockSize[1]) + 1) * bpBlockSize[1]; } adaptedVolSize[2] = reconDimensionZ; int volStrideHost[] = new int[2]; // compute volstride and copy it to constant memory volStrideHost[0] = adaptedVolSize[0]; volStrideHost[1] = adaptedVolSize[0] * adaptedVolSize[1]; // copy volume to device volumePointer = context.createFloatBuffer(h_volume.length, Mem.WRITE_ONLY); volumePointer.getBuffer().put(h_volume); volumePointer.getBuffer().rewind(); // copy volume stride to device volStride = context.createIntBuffer(volStrideHost.length, Mem.READ_ONLY); volStride.getBuffer().put(volStrideHost); volStride.getBuffer().rewind(); commandQueue.putWriteBuffer(volumePointer, true).putWriteBuffer(volStride, true).finish(); initialized = true; } }
public Grid2D openCLBackprojection( OpenCLGrid2D filteredSinogramm, int widthPhantom, int heightPhantom, int worksize, float detectorSpacing, int numberOfPixel, int numberProjections, float scanAngle, double[] spacing, double[] origin) { // create context CLContext context = OpenCLUtil.getStaticContext(); // select device CLDevice device = context.getMaxFlopsDevice(); // define local and global sizes double spacingAngle = (double) (scanAngle / numberProjections); double originDetector = -(detectorSpacing * numberOfPixel) / 2.0; int imageSize = widthPhantom * heightPhantom; int localWorkSize = Math.min(device.getMaxWorkGroupSize(), worksize); int globalWorkSizeW = OpenCLUtil.roundUp( localWorkSize, widthPhantom); // rounded up to the nearest multiple of localWorkSize int globalWorkSizeH = OpenCLUtil.roundUp(localWorkSize, heightPhantom); // load sources, create and build programm try { this.program = context.createProgram(this.getClass().getResourceAsStream("exercise4.cl")).build(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(-1); } // create image from input grid // CLImageFormat format = new CLImageFormat(ChannelOrder.INTENSITY, ChannelType.FLOAT); // create output image CLBuffer<FloatBuffer> output = context.createFloatBuffer(imageSize, Mem.WRITE_ONLY); if (kernel == null) { kernel = program.createCLKernel("parallelBackProjection"); } // createCommandQueue CLCommandQueue queue = device.createCommandQueue(); filteredSinogramm.getDelegate().prepareForDeviceOperation(); // put memory on the graphics card kernel .putArg(filteredSinogramm.getDelegate().getCLBuffer()) .putArg(output) .putArg(numberProjections) .putArg(numberOfPixel) .putArg(scanAngle) .putArg(widthPhantom) .putArg(heightPhantom) .putArg(spacing[0]) .putArg(spacing[1]) .putArg(origin[0]) .putArg(origin[1]) .putArg(detectorSpacing) .putArg(spacingAngle) .putArg(originDetector) .putArg(0.d); kernel.rewind(); queue .put2DRangeKernel( kernel, 0, 0, globalWorkSizeW, globalWorkSizeH, localWorkSize, localWorkSize) .putBarrier() .finish(); // put memory from graphic card to host queue.putReadBuffer(output, true).finish(); output.getBuffer().rewind(); for (int i = 0; i < image.getSize()[1]; ++i) { for (int j = 0; j < image.getSize()[0]; j++) { image.setAtIndex(j, i, output.getBuffer().get()); } } output.release(); queue.release(); return image; }