private synchronized void projectSingleProjection( int projectionNumber, int dimz, float respoffset) { // load projection matrix initProjectionMatrix(projectionNumber); // load projection Grid2D projection = projections.get(projectionNumber); initProjectionData(projection); if (!largeVolumeMode) { // projections.remove(projectionNumber); } // backproject for each slice // OpenCL Grids are only two dimensional! int reconDimensionZ = dimz; double voxelSpacingX = getGeometry().getVoxelSpacingX(); double voxelSpacingY = getGeometry().getVoxelSpacingY(); double voxelSpacingZ = getGeometry().getVoxelSpacingZ(); // write kernel parameters kernelFunction.rewind(); kernelFunction .putArg(volumePointer) .putArg(respoffset) .putArg((int) lineOffset) .putArg(reconDimensionZ) .putArg((float) voxelSpacingX) .putArg((float) voxelSpacingY) .putArg((float) voxelSpacingZ) .putArg((float) offsetX) .putArg((float) offsetY) .putArg((float) offsetZ) .putArg(projectionTex) .putArg(volStride) .putArg(projectionMatrix); int[] realLocalSize = { Math.min(device.getMaxWorkGroupSize(), bpBlockSize[0]), Math.min(device.getMaxWorkGroupSize(), bpBlockSize[1]) }; // rounded up to the nearest multiple of localWorkSize int[] globalWorkSize = {getGeometry().getReconDimensionX(), getGeometry().getReconDimensionY()}; // Call the OpenCL kernel, writing the results into the volume which is pointed at commandQueue .putWriteImage(projectionTex, false) .finish() .put2DRangeKernel( kernelFunction, 0, 0, globalWorkSize[0], globalWorkSize[1], realLocalSize[0], realLocalSize[1]) // .finish() // .putReadBuffer(dOut, true) .finish(); }
private synchronized void initProjectionMatrix(int projectionNumber) { // load projection Matrix for current Projection. SimpleMatrix pMat = getGeometry().getProjectionMatrix(projectionNumber).computeP(); float[] pMatFloat = new float[pMat.getCols() * pMat.getRows()]; for (int j = 0; j < pMat.getRows(); j++) { for (int i = 0; i < pMat.getCols(); i++) { pMatFloat[(j * pMat.getCols()) + i] = (float) pMat.getElement(j, i); } } // Obtain the global pointer to the view matrix from // the module if (projectionMatrix == null) projectionMatrix = context.createFloatBuffer(pMatFloat.length, Mem.READ_ONLY); projectionMatrix.getBuffer().put(pMatFloat); projectionMatrix.getBuffer().rewind(); commandQueue.putWriteBuffer(projectionMatrix, true).finish(); }
public void OpenCLRun(double[] motionfield) { try { while (projectionsAvailable.size() > 0) { Thread.sleep(CONRAD.INVERSE_SPEEDUP); if (showStatus) { float status = (float) (1.0 / projections.size()); if (largeVolumeMode) { IJ.showStatus("Streaming Projections to OpenCL Buffer"); } else { IJ.showStatus("Backprojecting with OpenCL"); } IJ.showProgress(status); } if (!largeVolumeMode) { workOnProjectionData(motionfield); } else { checkProjectionData(); } } // System.out.println("large Volume " + largeVolumeMode); if (largeVolumeMode) { // we have collected all projections. // now we can reconstruct subvolumes and stich them together. int reconDimensionZ = getGeometry().getReconDimensionZ(); double voxelSpacingX = getGeometry().getVoxelSpacingX(); double voxelSpacingY = getGeometry().getVoxelSpacingY(); double voxelSpacingZ = getGeometry().getVoxelSpacingZ(); useVOImap = false; initialize(projections.get(0)); double originalOffsetZ = offsetZ; double originalReconDimZ = reconDimensionZ; reconDimensionZ = subVolumeZ; int maxProjectionNumber = projections.size(); float all = nSteps * maxProjectionNumber * 2; for (int n = 0; n < nSteps; n++) { // For each subvolume // set all to 0; Arrays.fill(h_volume, 0); volumePointer.getBuffer().rewind(); volumePointer.getBuffer().put(h_volume); volumePointer.getBuffer().rewind(); commandQueue.putWriteBuffer(volumePointer, true).finish(); offsetZ = originalOffsetZ - (reconDimensionZ * voxelSpacingZ * n); for (int p = 0; p < maxProjectionNumber; p++) { // For all projections float currentStep = (n * maxProjectionNumber * 2) + p; if (showStatus) { IJ.showStatus("Backprojecting with OpenCL"); IJ.showProgress(currentStep / all); } // System.out.println("Current: " + p); float respoffset = (float) Math.round(motionfield[p] / voxelSpacingZ); try { projectSingleProjection(p, reconDimensionZ, respoffset); } catch (Exception e) { System.out.println("Backprojection of projection " + p + " was not successful."); e.printStackTrace(); } } // Gather volume commandQueue.putReadBuffer(volumePointer, true).finish(); volumePointer.getBuffer().rewind(); volumePointer.getBuffer().get(h_volume); volumePointer.getBuffer().rewind(); // move data to ImagePlus; if (projectionVolume != null) { for (int k = 0; k < reconDimensionZ; k++) { int index = (n * subVolumeZ) + k; if (showStatus) { float currentStep = (n * maxProjectionNumber * 2) + maxProjectionNumber + k; IJ.showStatus("Fetching Volume from OpenCL"); IJ.showProgress(currentStep / all); } if (index < originalReconDimZ) { for (int j = 0; j < projectionVolume.getSize()[1]; j++) { for (int i = 0; i < projectionVolume.getSize()[0]; i++) { float value = h_volume[ (((projectionVolume.getSize()[1] * k) + j) * projectionVolume.getSize()[0]) + i]; double[][] voxel = new double[4][1]; voxel[0][0] = (voxelSpacingX * i) - offsetX; voxel[1][0] = (voxelSpacingY * j) - offsetY; voxel[2][0] = (voxelSpacingZ * index) - originalOffsetZ; // exception for the case "interestedInVolume == null" and largeVolume is // enabled if (interestedInVolume == null) { projectionVolume.setAtIndex(i, j, index, value); } else { if (interestedInVolume.contains(voxel[0][0], voxel[1][0], voxel[2][0])) { projectionVolume.setAtIndex(i, j, index, value); } else { projectionVolume.setAtIndex(i, j, index, 0); } } } } } } } } } } catch (InterruptedException e) { e.printStackTrace(); } if (showStatus) IJ.showProgress(1.0); unload(); if (debug) System.out.println("Unloaded"); }
private synchronized void unload() { if (initialized) { if ((projectionVolume != null) && (!largeVolumeMode)) { commandQueue.putReadBuffer(volumePointer, true).finish(); volumePointer.getBuffer().rewind(); volumePointer.getBuffer().get(h_volume); volumePointer.getBuffer().rewind(); int width = projectionVolume.getSize()[0]; int height = projectionVolume.getSize()[1]; if (this.useVOImap) { for (int k = 0; k < projectionVolume.getSize()[2]; k++) { for (int j = 0; j < height; j++) { for (int i = 0; i < width; i++) { float value = h_volume[(((height * k) + j) * width) + i]; if (voiMap[i][j][k]) { projectionVolume.setAtIndex(i, j, k, value); } else { projectionVolume.setAtIndex(i, j, k, 0); } } } } } else { for (int k = 0; k < projectionVolume.getSize()[2]; k++) { for (int j = 0; j < height; j++) { for (int i = 0; i < width; i++) { float value = h_volume[(((height * k) + j) * width) + i]; projectionVolume.setAtIndex(i, j, k, value); } } } } } else { System.out.println("Check ProjectionVolume. It seems null."); } h_volume = null; // free memory on device commandQueue.release(); if (projectionTex != null) projectionTex.release(); if (projectionMatrix != null) projectionMatrix.release(); if (volStride != null) volStride.release(); if (projectionArray != null) projectionArray.release(); if (volumePointer != null) volumePointer.release(); kernelFunction.release(); program.release(); // destory context context.release(); commandQueue = null; projectionArray = null; projectionMatrix = null; projectionTex = null; volStride = null; volumePointer = null; kernelFunction = null; program = null; context = null; initialized = false; } }
protected void init() { if (!initialized) { largeVolumeMode = false; int reconDimensionX = getGeometry().getReconDimensionX(); int reconDimensionY = getGeometry().getReconDimensionY(); int reconDimensionZ = getGeometry().getReconDimensionZ(); projectionsAvailable = new ArrayList<Integer>(); projectionsDone = new ArrayList<Integer>(); // Initialize JOCL. context = OpenCLUtil.createContext(); try { // get the fastest device device = context.getMaxFlopsDevice(); // create the command queue commandQueue = device.createCommandQueue(); // initialize the program if (program == null || !program.getContext().equals(this.context)) { program = context .createProgram( OpenCLCompensatedBackProjector.class.getResourceAsStream( "compensatedBackprojectCL.cl")) .build(); } } catch (Exception e) { if (commandQueue != null) commandQueue.release(); if (kernelFunction != null) kernelFunction.release(); if (program != null) program.release(); // destory context if (context != null) context.release(); // TODO: handle exception e.printStackTrace(); } // check space on device: long memory = device.getMaxMemAllocSize(); long availableMemory = (memory); long requiredMemory = (long) (((((double) reconDimensionX) * reconDimensionY * ((double) reconDimensionZ) * 4) + (((double) Configuration.getGlobalConfiguration().getGeometry().getDetectorHeight()) * Configuration.getGlobalConfiguration().getGeometry().getDetectorWidth() * 4))); if (debug) { System.out.println("Total available Memory on OpenCL card:" + availableMemory); System.out.println("Required Memory on OpenCL card:" + requiredMemory); } if (requiredMemory > availableMemory) { nSteps = (int) OpenCLUtil.iDivUp(requiredMemory, availableMemory); if (debug) System.out.println("Switching to large volume mode with nSteps = " + nSteps); largeVolumeMode = true; } if (debug) { // TODO replace /* CUdevprop prop = new CUdevprop(); JCudaDriver.cuDeviceGetProperties(prop, dev); System.out.println(prop.toFormattedString()); */ } // create the computing kernel kernelFunction = program.createCLKernel("backprojectKernel"); // create the reconstruction volume; int memorysize = reconDimensionX * reconDimensionY * reconDimensionZ * 4; if (largeVolumeMode) { subVolumeZ = OpenCLUtil.iDivUp(reconDimensionZ, nSteps); if (debug) System.out.println("SubVolumeZ: " + subVolumeZ); h_volume = new float[reconDimensionX * reconDimensionY * subVolumeZ]; memorysize = reconDimensionX * reconDimensionY * subVolumeZ * 4; if (debug) System.out.println("Memory: " + memorysize); } else { h_volume = new float[reconDimensionX * reconDimensionY * reconDimensionZ]; } // compute adapted volume size // volume size in x = multiple of bpBlockSize[0] // volume size in y = multiple of bpBlockSize[1] int adaptedVolSize[] = new int[3]; if ((reconDimensionX % bpBlockSize[0]) == 0) { adaptedVolSize[0] = reconDimensionX; } else { adaptedVolSize[0] = ((reconDimensionX / bpBlockSize[0]) + 1) * bpBlockSize[0]; } if ((reconDimensionY % bpBlockSize[1]) == 0) { adaptedVolSize[1] = reconDimensionY; } else { adaptedVolSize[1] = ((reconDimensionY / bpBlockSize[1]) + 1) * bpBlockSize[1]; } adaptedVolSize[2] = reconDimensionZ; int volStrideHost[] = new int[2]; // compute volstride and copy it to constant memory volStrideHost[0] = adaptedVolSize[0]; volStrideHost[1] = adaptedVolSize[0] * adaptedVolSize[1]; // copy volume to device volumePointer = context.createFloatBuffer(h_volume.length, Mem.WRITE_ONLY); volumePointer.getBuffer().put(h_volume); volumePointer.getBuffer().rewind(); // copy volume stride to device volStride = context.createIntBuffer(volStrideHost.length, Mem.READ_ONLY); volStride.getBuffer().put(volStrideHost); volStride.getBuffer().rewind(); commandQueue.putWriteBuffer(volumePointer, true).putWriteBuffer(volStride, true).finish(); initialized = true; } }
public Grid2D add(OpenCLGrid2D image1, OpenCLGrid2D image2) { // create context if (context == null) { context = OpenCLUtil.getStaticContext(); } // select device if (device == null) { device = context.getMaxFlopsDevice(); } // define local and global sizes int width = Math.min(image1.getWidth(), image2.getWidth()); int height = Math.min(image1.getHeight(), image2.getHeight()); int imageSize = width * height; int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 8); int globalWorkSizeW = OpenCLUtil.roundUp( localWorkSize, width); // rounded up to the nearest multiple of localWorkSize int globalWorkSizeH = OpenCLUtil.roundUp(localWorkSize, height); // load sources, create and build programm if (program == null) { try { program = context.createProgram(this.getClass().getResourceAsStream("exercise4.cl")).build(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(-1); } } // create output image CLBuffer<FloatBuffer> output = context.createFloatBuffer(imageSize, Mem.WRITE_ONLY); if (kernel == null) { kernel = program.createCLKernel("addImages"); } // createCommandQueue CLCommandQueue queue = device.createCommandQueue(); image1.getDelegate().prepareForDeviceOperation(); image2.getDelegate().prepareForDeviceOperation(); // put memory on the graphics card kernel .putArg(image1.getDelegate().getCLBuffer()) .putArg(image2.getDelegate().getCLBuffer()) .putArg(output) .putArg(width) .putArg(height); kernel.rewind(); queue .put2DRangeKernel( kernel, 0, 0, globalWorkSizeW, globalWorkSizeH, localWorkSize, localWorkSize) .putBarrier() // put memory from graphic card to host .putReadBuffer(output, true) .finish(); Grid2D result = new Grid2D(image1); output.getBuffer().rewind(); for (int i = 0; i < result.getSize()[1]; ++i) { for (int j = 0; j < result.getSize()[0]; j++) { result.setAtIndex(j, i, output.getBuffer().get()); } } output.release(); queue.release(); return result; }
public Grid2D openCLBackprojection( OpenCLGrid2D filteredSinogramm, int widthPhantom, int heightPhantom, int worksize, float detectorSpacing, int numberOfPixel, int numberProjections, float scanAngle, double[] spacing, double[] origin) { // create context CLContext context = OpenCLUtil.getStaticContext(); // select device CLDevice device = context.getMaxFlopsDevice(); // define local and global sizes double spacingAngle = (double) (scanAngle / numberProjections); double originDetector = -(detectorSpacing * numberOfPixel) / 2.0; int imageSize = widthPhantom * heightPhantom; int localWorkSize = Math.min(device.getMaxWorkGroupSize(), worksize); int globalWorkSizeW = OpenCLUtil.roundUp( localWorkSize, widthPhantom); // rounded up to the nearest multiple of localWorkSize int globalWorkSizeH = OpenCLUtil.roundUp(localWorkSize, heightPhantom); // load sources, create and build programm try { this.program = context.createProgram(this.getClass().getResourceAsStream("exercise4.cl")).build(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(-1); } // create image from input grid // CLImageFormat format = new CLImageFormat(ChannelOrder.INTENSITY, ChannelType.FLOAT); // create output image CLBuffer<FloatBuffer> output = context.createFloatBuffer(imageSize, Mem.WRITE_ONLY); if (kernel == null) { kernel = program.createCLKernel("parallelBackProjection"); } // createCommandQueue CLCommandQueue queue = device.createCommandQueue(); filteredSinogramm.getDelegate().prepareForDeviceOperation(); // put memory on the graphics card kernel .putArg(filteredSinogramm.getDelegate().getCLBuffer()) .putArg(output) .putArg(numberProjections) .putArg(numberOfPixel) .putArg(scanAngle) .putArg(widthPhantom) .putArg(heightPhantom) .putArg(spacing[0]) .putArg(spacing[1]) .putArg(origin[0]) .putArg(origin[1]) .putArg(detectorSpacing) .putArg(spacingAngle) .putArg(originDetector) .putArg(0.d); kernel.rewind(); queue .put2DRangeKernel( kernel, 0, 0, globalWorkSizeW, globalWorkSizeH, localWorkSize, localWorkSize) .putBarrier() .finish(); // put memory from graphic card to host queue.putReadBuffer(output, true).finish(); output.getBuffer().rewind(); for (int i = 0; i < image.getSize()[1]; ++i) { for (int j = 0; j < image.getSize()[0]; j++) { image.setAtIndex(j, i, output.getBuffer().get()); } } output.release(); queue.release(); return image; }