public Grid2D add(OpenCLGrid2D image1, OpenCLGrid2D image2) {

    // create context
    if (context == null) {
      context = OpenCLUtil.getStaticContext();
    }
    // select device
    if (device == null) {
      device = context.getMaxFlopsDevice();
    }
    // define local and global sizes
    int width = Math.min(image1.getWidth(), image2.getWidth());
    int height = Math.min(image1.getHeight(), image2.getHeight());

    int imageSize = width * height;
    int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 8);
    int globalWorkSizeW =
        OpenCLUtil.roundUp(
            localWorkSize, width); // rounded up to the nearest multiple of localWorkSize
    int globalWorkSizeH = OpenCLUtil.roundUp(localWorkSize, height);

    // load sources, create and build programm
    if (program == null) {
      try {
        program =
            context.createProgram(this.getClass().getResourceAsStream("exercise4.cl")).build();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        System.exit(-1);
      }
    }

    // create output image
    CLBuffer<FloatBuffer> output = context.createFloatBuffer(imageSize, Mem.WRITE_ONLY);

    if (kernel == null) {
      kernel = program.createCLKernel("addImages");
    }
    // createCommandQueue
    CLCommandQueue queue = device.createCommandQueue();
    image1.getDelegate().prepareForDeviceOperation();
    image2.getDelegate().prepareForDeviceOperation();
    // put memory on the graphics card

    kernel
        .putArg(image1.getDelegate().getCLBuffer())
        .putArg(image2.getDelegate().getCLBuffer())
        .putArg(output)
        .putArg(width)
        .putArg(height);
    kernel.rewind();

    queue
        .put2DRangeKernel(
            kernel, 0, 0, globalWorkSizeW, globalWorkSizeH, localWorkSize, localWorkSize)
        .putBarrier()
        // put memory from graphic card to host
        .putReadBuffer(output, true)
        .finish();

    Grid2D result = new Grid2D(image1);
    output.getBuffer().rewind();

    for (int i = 0; i < result.getSize()[1]; ++i) {
      for (int j = 0; j < result.getSize()[0]; j++) {
        result.setAtIndex(j, i, output.getBuffer().get());
      }
    }

    output.release();
    queue.release();

    return result;
  }
  public Grid2D openCLBackprojection(
      OpenCLGrid2D filteredSinogramm,
      int widthPhantom,
      int heightPhantom,
      int worksize,
      float detectorSpacing,
      int numberOfPixel,
      int numberProjections,
      float scanAngle,
      double[] spacing,
      double[] origin) {
    // create context
    CLContext context = OpenCLUtil.getStaticContext();

    // select device
    CLDevice device = context.getMaxFlopsDevice();

    // define local and global sizes

    double spacingAngle = (double) (scanAngle / numberProjections);
    double originDetector = -(detectorSpacing * numberOfPixel) / 2.0;

    int imageSize = widthPhantom * heightPhantom;
    int localWorkSize = Math.min(device.getMaxWorkGroupSize(), worksize);
    int globalWorkSizeW =
        OpenCLUtil.roundUp(
            localWorkSize, widthPhantom); // rounded up to the nearest multiple of localWorkSize
    int globalWorkSizeH = OpenCLUtil.roundUp(localWorkSize, heightPhantom);

    // load sources, create and build programm

    try {
      this.program =
          context.createProgram(this.getClass().getResourceAsStream("exercise4.cl")).build();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
      System.exit(-1);
    }

    // create image from input grid
    // CLImageFormat format = new CLImageFormat(ChannelOrder.INTENSITY, ChannelType.FLOAT);

    // create output image
    CLBuffer<FloatBuffer> output = context.createFloatBuffer(imageSize, Mem.WRITE_ONLY);
    if (kernel == null) {
      kernel = program.createCLKernel("parallelBackProjection");
    }

    // createCommandQueue
    CLCommandQueue queue = device.createCommandQueue();
    filteredSinogramm.getDelegate().prepareForDeviceOperation();
    // put memory on the graphics card

    kernel
        .putArg(filteredSinogramm.getDelegate().getCLBuffer())
        .putArg(output)
        .putArg(numberProjections)
        .putArg(numberOfPixel)
        .putArg(scanAngle)
        .putArg(widthPhantom)
        .putArg(heightPhantom)
        .putArg(spacing[0])
        .putArg(spacing[1])
        .putArg(origin[0])
        .putArg(origin[1])
        .putArg(detectorSpacing)
        .putArg(spacingAngle)
        .putArg(originDetector)
        .putArg(0.d);

    kernel.rewind();

    queue
        .put2DRangeKernel(
            kernel, 0, 0, globalWorkSizeW, globalWorkSizeH, localWorkSize, localWorkSize)
        .putBarrier()
        .finish();
    // put memory from graphic card to host
    queue.putReadBuffer(output, true).finish();

    output.getBuffer().rewind();

    for (int i = 0; i < image.getSize()[1]; ++i) {
      for (int j = 0; j < image.getSize()[0]; j++) {
        image.setAtIndex(j, i, output.getBuffer().get());
      }
    }

    output.release();
    queue.release();

    return image;
  }