private synchronized void initProjectionData(Grid2D projection) {
    initialize(projection);
    if (projection != null) {
      float[] proj = new float[projection.getWidth() * projection.getHeight()];

      for (int i = 0; i < projection.getWidth(); i++) {
        for (int j = 0; j < projection.getHeight(); j++) {
          proj[(j * projection.getWidth()) + i] = projection.getPixelValue(i, j);
        }
      }

      if (projectionArray == null) {
        // Create the array that will contain the
        // projection data.
        projectionArray =
            context.createFloatBuffer(
                projection.getWidth() * projection.getHeight(), Mem.READ_ONLY);
      }

      // Copy the projection data to the array
      projectionArray.getBuffer().put(proj);
      projectionArray.getBuffer().rewind();

      // set the texture
      CLImageFormat format = new CLImageFormat(ChannelOrder.INTENSITY, ChannelType.FLOAT);
      projectionTex =
          context.createImage2d(
              projectionArray.getBuffer(),
              projection.getWidth(),
              projection.getHeight(),
              format,
              Mem.READ_ONLY);
      // projectionArray.release();

    } else {
      System.out.println("Projection was null!!");
    }
  }
  private synchronized void initProjectionMatrix(int projectionNumber) {
    // load projection Matrix for current Projection.
    SimpleMatrix pMat = getGeometry().getProjectionMatrix(projectionNumber).computeP();
    float[] pMatFloat = new float[pMat.getCols() * pMat.getRows()];
    for (int j = 0; j < pMat.getRows(); j++) {
      for (int i = 0; i < pMat.getCols(); i++) {

        pMatFloat[(j * pMat.getCols()) + i] = (float) pMat.getElement(j, i);
      }
    }

    // Obtain the global pointer to the view matrix from
    // the module
    if (projectionMatrix == null)
      projectionMatrix = context.createFloatBuffer(pMatFloat.length, Mem.READ_ONLY);

    projectionMatrix.getBuffer().put(pMatFloat);
    projectionMatrix.getBuffer().rewind();
    commandQueue.putWriteBuffer(projectionMatrix, true).finish();
  }
  private synchronized void unload() {
    if (initialized) {

      if ((projectionVolume != null) && (!largeVolumeMode)) {

        commandQueue.putReadBuffer(volumePointer, true).finish();
        volumePointer.getBuffer().rewind();
        volumePointer.getBuffer().get(h_volume);
        volumePointer.getBuffer().rewind();

        int width = projectionVolume.getSize()[0];
        int height = projectionVolume.getSize()[1];
        if (this.useVOImap) {
          for (int k = 0; k < projectionVolume.getSize()[2]; k++) {
            for (int j = 0; j < height; j++) {
              for (int i = 0; i < width; i++) {
                float value = h_volume[(((height * k) + j) * width) + i];
                if (voiMap[i][j][k]) {
                  projectionVolume.setAtIndex(i, j, k, value);
                } else {
                  projectionVolume.setAtIndex(i, j, k, 0);
                }
              }
            }
          }
        } else {
          for (int k = 0; k < projectionVolume.getSize()[2]; k++) {
            for (int j = 0; j < height; j++) {
              for (int i = 0; i < width; i++) {
                float value = h_volume[(((height * k) + j) * width) + i];
                projectionVolume.setAtIndex(i, j, k, value);
              }
            }
          }
        }
      } else {
        System.out.println("Check ProjectionVolume. It seems null.");
      }

      h_volume = null;

      // free memory on device
      commandQueue.release();

      if (projectionTex != null) projectionTex.release();
      if (projectionMatrix != null) projectionMatrix.release();
      if (volStride != null) volStride.release();
      if (projectionArray != null) projectionArray.release();
      if (volumePointer != null) volumePointer.release();

      kernelFunction.release();
      program.release();
      // destory context
      context.release();

      commandQueue = null;
      projectionArray = null;
      projectionMatrix = null;
      projectionTex = null;
      volStride = null;
      volumePointer = null;
      kernelFunction = null;
      program = null;
      context = null;

      initialized = false;
    }
  }
  protected void init() {
    if (!initialized) {
      largeVolumeMode = false;

      int reconDimensionX = getGeometry().getReconDimensionX();
      int reconDimensionY = getGeometry().getReconDimensionY();
      int reconDimensionZ = getGeometry().getReconDimensionZ();
      projectionsAvailable = new ArrayList<Integer>();
      projectionsDone = new ArrayList<Integer>();

      // Initialize JOCL.
      context = OpenCLUtil.createContext();

      try {
        // get the fastest device
        device = context.getMaxFlopsDevice();
        // create the command queue
        commandQueue = device.createCommandQueue();

        // initialize the program
        if (program == null || !program.getContext().equals(this.context)) {
          program =
              context
                  .createProgram(
                      OpenCLCompensatedBackProjector.class.getResourceAsStream(
                          "compensatedBackprojectCL.cl"))
                  .build();
        }

      } catch (Exception e) {
        if (commandQueue != null) commandQueue.release();
        if (kernelFunction != null) kernelFunction.release();
        if (program != null) program.release();
        // destory context
        if (context != null) context.release();
        // TODO: handle exception
        e.printStackTrace();
      }

      // check space on device:
      long memory = device.getMaxMemAllocSize();
      long availableMemory = (memory);
      long requiredMemory =
          (long)
              (((((double) reconDimensionX) * reconDimensionY * ((double) reconDimensionZ) * 4)
                  + (((double)
                          Configuration.getGlobalConfiguration().getGeometry().getDetectorHeight())
                      * Configuration.getGlobalConfiguration().getGeometry().getDetectorWidth()
                      * 4)));
      if (debug) {
        System.out.println("Total available Memory on OpenCL card:" + availableMemory);
        System.out.println("Required Memory on OpenCL card:" + requiredMemory);
      }
      if (requiredMemory > availableMemory) {
        nSteps = (int) OpenCLUtil.iDivUp(requiredMemory, availableMemory);
        if (debug) System.out.println("Switching to large volume mode with nSteps = " + nSteps);
        largeVolumeMode = true;
      }
      if (debug) {
        // TODO replace
        /*
        CUdevprop prop = new CUdevprop();
        JCudaDriver.cuDeviceGetProperties(prop, dev);
        System.out.println(prop.toFormattedString());
        */
      }

      // create the computing kernel
      kernelFunction = program.createCLKernel("backprojectKernel");

      // create the reconstruction volume;
      int memorysize = reconDimensionX * reconDimensionY * reconDimensionZ * 4;
      if (largeVolumeMode) {
        subVolumeZ = OpenCLUtil.iDivUp(reconDimensionZ, nSteps);
        if (debug) System.out.println("SubVolumeZ: " + subVolumeZ);
        h_volume = new float[reconDimensionX * reconDimensionY * subVolumeZ];
        memorysize = reconDimensionX * reconDimensionY * subVolumeZ * 4;
        if (debug) System.out.println("Memory: " + memorysize);
      } else {
        h_volume = new float[reconDimensionX * reconDimensionY * reconDimensionZ];
      }

      // compute adapted volume size
      //    volume size in x = multiple of bpBlockSize[0]
      //    volume size in y = multiple of bpBlockSize[1]

      int adaptedVolSize[] = new int[3];
      if ((reconDimensionX % bpBlockSize[0]) == 0) {
        adaptedVolSize[0] = reconDimensionX;
      } else {
        adaptedVolSize[0] = ((reconDimensionX / bpBlockSize[0]) + 1) * bpBlockSize[0];
      }
      if ((reconDimensionY % bpBlockSize[1]) == 0) {
        adaptedVolSize[1] = reconDimensionY;
      } else {
        adaptedVolSize[1] = ((reconDimensionY / bpBlockSize[1]) + 1) * bpBlockSize[1];
      }
      adaptedVolSize[2] = reconDimensionZ;
      int volStrideHost[] = new int[2];
      // compute volstride and copy it to constant memory
      volStrideHost[0] = adaptedVolSize[0];
      volStrideHost[1] = adaptedVolSize[0] * adaptedVolSize[1];

      // copy volume to device
      volumePointer = context.createFloatBuffer(h_volume.length, Mem.WRITE_ONLY);
      volumePointer.getBuffer().put(h_volume);
      volumePointer.getBuffer().rewind();

      // copy volume stride to device
      volStride = context.createIntBuffer(volStrideHost.length, Mem.READ_ONLY);
      volStride.getBuffer().put(volStrideHost);
      volStride.getBuffer().rewind();

      commandQueue.putWriteBuffer(volumePointer, true).putWriteBuffer(volStride, true).finish();

      initialized = true;
    }
  }
  public Grid2D add(OpenCLGrid2D image1, OpenCLGrid2D image2) {

    // create context
    if (context == null) {
      context = OpenCLUtil.getStaticContext();
    }
    // select device
    if (device == null) {
      device = context.getMaxFlopsDevice();
    }
    // define local and global sizes
    int width = Math.min(image1.getWidth(), image2.getWidth());
    int height = Math.min(image1.getHeight(), image2.getHeight());

    int imageSize = width * height;
    int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 8);
    int globalWorkSizeW =
        OpenCLUtil.roundUp(
            localWorkSize, width); // rounded up to the nearest multiple of localWorkSize
    int globalWorkSizeH = OpenCLUtil.roundUp(localWorkSize, height);

    // load sources, create and build programm
    if (program == null) {
      try {
        program =
            context.createProgram(this.getClass().getResourceAsStream("exercise4.cl")).build();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        System.exit(-1);
      }
    }

    // create output image
    CLBuffer<FloatBuffer> output = context.createFloatBuffer(imageSize, Mem.WRITE_ONLY);

    if (kernel == null) {
      kernel = program.createCLKernel("addImages");
    }
    // createCommandQueue
    CLCommandQueue queue = device.createCommandQueue();
    image1.getDelegate().prepareForDeviceOperation();
    image2.getDelegate().prepareForDeviceOperation();
    // put memory on the graphics card

    kernel
        .putArg(image1.getDelegate().getCLBuffer())
        .putArg(image2.getDelegate().getCLBuffer())
        .putArg(output)
        .putArg(width)
        .putArg(height);
    kernel.rewind();

    queue
        .put2DRangeKernel(
            kernel, 0, 0, globalWorkSizeW, globalWorkSizeH, localWorkSize, localWorkSize)
        .putBarrier()
        // put memory from graphic card to host
        .putReadBuffer(output, true)
        .finish();

    Grid2D result = new Grid2D(image1);
    output.getBuffer().rewind();

    for (int i = 0; i < result.getSize()[1]; ++i) {
      for (int j = 0; j < result.getSize()[0]; j++) {
        result.setAtIndex(j, i, output.getBuffer().get());
      }
    }

    output.release();
    queue.release();

    return result;
  }
  public Grid2D openCLBackprojection(
      OpenCLGrid2D filteredSinogramm,
      int widthPhantom,
      int heightPhantom,
      int worksize,
      float detectorSpacing,
      int numberOfPixel,
      int numberProjections,
      float scanAngle,
      double[] spacing,
      double[] origin) {
    // create context
    CLContext context = OpenCLUtil.getStaticContext();

    // select device
    CLDevice device = context.getMaxFlopsDevice();

    // define local and global sizes

    double spacingAngle = (double) (scanAngle / numberProjections);
    double originDetector = -(detectorSpacing * numberOfPixel) / 2.0;

    int imageSize = widthPhantom * heightPhantom;
    int localWorkSize = Math.min(device.getMaxWorkGroupSize(), worksize);
    int globalWorkSizeW =
        OpenCLUtil.roundUp(
            localWorkSize, widthPhantom); // rounded up to the nearest multiple of localWorkSize
    int globalWorkSizeH = OpenCLUtil.roundUp(localWorkSize, heightPhantom);

    // load sources, create and build programm

    try {
      this.program =
          context.createProgram(this.getClass().getResourceAsStream("exercise4.cl")).build();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
      System.exit(-1);
    }

    // create image from input grid
    // CLImageFormat format = new CLImageFormat(ChannelOrder.INTENSITY, ChannelType.FLOAT);

    // create output image
    CLBuffer<FloatBuffer> output = context.createFloatBuffer(imageSize, Mem.WRITE_ONLY);
    if (kernel == null) {
      kernel = program.createCLKernel("parallelBackProjection");
    }

    // createCommandQueue
    CLCommandQueue queue = device.createCommandQueue();
    filteredSinogramm.getDelegate().prepareForDeviceOperation();
    // put memory on the graphics card

    kernel
        .putArg(filteredSinogramm.getDelegate().getCLBuffer())
        .putArg(output)
        .putArg(numberProjections)
        .putArg(numberOfPixel)
        .putArg(scanAngle)
        .putArg(widthPhantom)
        .putArg(heightPhantom)
        .putArg(spacing[0])
        .putArg(spacing[1])
        .putArg(origin[0])
        .putArg(origin[1])
        .putArg(detectorSpacing)
        .putArg(spacingAngle)
        .putArg(originDetector)
        .putArg(0.d);

    kernel.rewind();

    queue
        .put2DRangeKernel(
            kernel, 0, 0, globalWorkSizeW, globalWorkSizeH, localWorkSize, localWorkSize)
        .putBarrier()
        .finish();
    // put memory from graphic card to host
    queue.putReadBuffer(output, true).finish();

    output.getBuffer().rewind();

    for (int i = 0; i < image.getSize()[1]; ++i) {
      for (int j = 0; j < image.getSize()[0]; j++) {
        image.setAtIndex(j, i, output.getBuffer().get());
      }
    }

    output.release();
    queue.release();

    return image;
  }