public Grid2D add(OpenCLGrid2D image1, OpenCLGrid2D image2) { // create context if (context == null) { context = OpenCLUtil.getStaticContext(); } // select device if (device == null) { device = context.getMaxFlopsDevice(); } // define local and global sizes int width = Math.min(image1.getWidth(), image2.getWidth()); int height = Math.min(image1.getHeight(), image2.getHeight()); int imageSize = width * height; int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 8); int globalWorkSizeW = OpenCLUtil.roundUp( localWorkSize, width); // rounded up to the nearest multiple of localWorkSize int globalWorkSizeH = OpenCLUtil.roundUp(localWorkSize, height); // load sources, create and build programm if (program == null) { try { program = context.createProgram(this.getClass().getResourceAsStream("exercise4.cl")).build(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(-1); } } // create output image CLBuffer<FloatBuffer> output = context.createFloatBuffer(imageSize, Mem.WRITE_ONLY); if (kernel == null) { kernel = program.createCLKernel("addImages"); } // createCommandQueue CLCommandQueue queue = device.createCommandQueue(); image1.getDelegate().prepareForDeviceOperation(); image2.getDelegate().prepareForDeviceOperation(); // put memory on the graphics card kernel .putArg(image1.getDelegate().getCLBuffer()) .putArg(image2.getDelegate().getCLBuffer()) .putArg(output) .putArg(width) .putArg(height); kernel.rewind(); queue .put2DRangeKernel( kernel, 0, 0, globalWorkSizeW, globalWorkSizeH, localWorkSize, localWorkSize) .putBarrier() // put memory from graphic card to host .putReadBuffer(output, true) .finish(); Grid2D result = new Grid2D(image1); output.getBuffer().rewind(); for (int i = 0; i < result.getSize()[1]; ++i) { for (int j = 0; j < result.getSize()[0]; j++) { result.setAtIndex(j, i, output.getBuffer().get()); } } output.release(); queue.release(); return result; }
public Grid2D openCLBackprojection( OpenCLGrid2D filteredSinogramm, int widthPhantom, int heightPhantom, int worksize, float detectorSpacing, int numberOfPixel, int numberProjections, float scanAngle, double[] spacing, double[] origin) { // create context CLContext context = OpenCLUtil.getStaticContext(); // select device CLDevice device = context.getMaxFlopsDevice(); // define local and global sizes double spacingAngle = (double) (scanAngle / numberProjections); double originDetector = -(detectorSpacing * numberOfPixel) / 2.0; int imageSize = widthPhantom * heightPhantom; int localWorkSize = Math.min(device.getMaxWorkGroupSize(), worksize); int globalWorkSizeW = OpenCLUtil.roundUp( localWorkSize, widthPhantom); // rounded up to the nearest multiple of localWorkSize int globalWorkSizeH = OpenCLUtil.roundUp(localWorkSize, heightPhantom); // load sources, create and build programm try { this.program = context.createProgram(this.getClass().getResourceAsStream("exercise4.cl")).build(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(-1); } // create image from input grid // CLImageFormat format = new CLImageFormat(ChannelOrder.INTENSITY, ChannelType.FLOAT); // create output image CLBuffer<FloatBuffer> output = context.createFloatBuffer(imageSize, Mem.WRITE_ONLY); if (kernel == null) { kernel = program.createCLKernel("parallelBackProjection"); } // createCommandQueue CLCommandQueue queue = device.createCommandQueue(); filteredSinogramm.getDelegate().prepareForDeviceOperation(); // put memory on the graphics card kernel .putArg(filteredSinogramm.getDelegate().getCLBuffer()) .putArg(output) .putArg(numberProjections) .putArg(numberOfPixel) .putArg(scanAngle) .putArg(widthPhantom) .putArg(heightPhantom) .putArg(spacing[0]) .putArg(spacing[1]) .putArg(origin[0]) .putArg(origin[1]) .putArg(detectorSpacing) .putArg(spacingAngle) .putArg(originDetector) .putArg(0.d); kernel.rewind(); queue .put2DRangeKernel( kernel, 0, 0, globalWorkSizeW, globalWorkSizeH, localWorkSize, localWorkSize) .putBarrier() .finish(); // put memory from graphic card to host queue.putReadBuffer(output, true).finish(); output.getBuffer().rewind(); for (int i = 0; i < image.getSize()[1]; ++i) { for (int j = 0; j < image.getSize()[0]; j++) { image.setAtIndex(j, i, output.getBuffer().get()); } } output.release(); queue.release(); return image; }