public GenericGaussianConvolution(
      final F input,
      final ImgFactory<T> outputFactory,
      final OutOfBoundsFactory<T, F> outOfBoundsFactory1,
      final OutOfBoundsFactory<T, Img<T>> outOfBoundsFactory2,
      final double[] sigma) {
    this.input = input;
    this.outputFactory = outputFactory;
    this.convolved = outputFactory.create(input, input.firstElement().createVariable());
    this.sigma = sigma;
    this.processingTime = -1;
    setNumThreads();

    this.outOfBoundsFactory1 = outOfBoundsFactory1;
    this.outOfBoundsFactory2 = outOfBoundsFactory2;
    this.numDimensions = input.numDimensions();

    this.kernel = new double[numDimensions][];

    for (int d = 0; d < numDimensions; ++d)
      this.kernel[d] = Util.createGaussianKernel1DDouble(sigma[d], true);
  }
  @Override
  public boolean process() {
    final long startTime = System.currentTimeMillis();
    /*
    if ( container.numDimensions() == 3 && Array.class.isInstance( container ) && FloatType.class.isInstance( container.createVariable() ))
    {
      		//System.out.println( "GaussianConvolution: Input is instance of Image<Float> using an Array3D, fast forward algorithm");
      		computeGaussFloatArray3D();

      		processingTime = System.currentTimeMillis() - startTime;

      		return true;
    }
      	*/
    final Img<T> temp = outputFactory.create(input, input.firstElement().createVariable());
    final long containerSize = input.size();

    //
    // Folding loop
    //
    for (int dim = 0; dim < numDimensions; dim++) {
      final int currentDim = dim;

      final AtomicInteger ai = new AtomicInteger(0);
      final Thread[] threads = SimpleMultiThreading.newThreads(numThreads);

      final long threadChunkSize = containerSize / threads.length;
      final long threadChunkMod = containerSize % threads.length;

      for (int ithread = 0; ithread < threads.length; ++ithread)
        threads[ithread] =
            new Thread(
                new Runnable() {
                  public void run() {
                    // Thread ID
                    final int myNumber = ai.getAndIncrement();

                    // System.out.println("Thread " + myNumber + " folds in dimension " +
                    // currentDim);

                    final RandomAccess<T> inputIterator;
                    final Cursor<T> outputIterator;

                    if (numDimensions % 2 == 0) // even number of dimensions ( 2d, 4d, 6d, ... )
                    {
                      if (currentDim == 0) // first dimension convolve to the temporary container
                      {
                        inputIterator = input.randomAccess(outOfBoundsFactory1);
                        outputIterator = temp.localizingCursor();
                      } else if (currentDim % 2
                          == 1) // for odd dimension ids we convolve to the output container,
                                // because that might be the last convolution
                      {
                        inputIterator = temp.randomAccess(outOfBoundsFactory2);
                        outputIterator = convolved.localizingCursor();
                      } else // if ( currentDim % 2 == 0 ) // for even dimension ids we convolve to
                             // the temp container, it is not the last convolution for sure
                      {
                        inputIterator = convolved.randomAccess(outOfBoundsFactory2);
                        outputIterator = temp.localizingCursor();
                      }
                    } else // ( numDimensions % 2 != 0 ) // even number of dimensions ( 1d, 3d, 5d,
                           // ... )
                    {
                      if (currentDim
                          == 0) // first dimension convolve to the output container, in the 1d case
                                // we are done then already
                      {
                        inputIterator = input.randomAccess(outOfBoundsFactory1);
                        outputIterator = convolved.localizingCursor();
                      } else if (currentDim % 2
                          == 1) // for odd dimension ids we convolve to the output container,
                                // because that might be the last convolution
                      {
                        inputIterator = convolved.randomAccess(outOfBoundsFactory2);
                        outputIterator = temp.localizingCursor();
                      } else // if ( currentDim % 2 == 0 ) // for even dimension ids we convolve to
                             // the temp container, it is not the last convolution for sure
                      {
                        inputIterator = temp.randomAccess(outOfBoundsFactory2);
                        outputIterator = convolved.localizingCursor();
                      }
                    }

                    // move to the starting position of the current thread
                    final long startPosition = myNumber * threadChunkSize;

                    // the last thread may has to run longer if the number of pixels cannot be
                    // divided by the number of threads
                    final long loopSize;
                    if (myNumber == numThreads - 1) loopSize = threadChunkSize + threadChunkMod;
                    else loopSize = threadChunkSize;

                    // convolve the container in the current dimension using the given cursors
                    float[] kernelF = new float[kernel[currentDim].length];

                    for (int i = 0; i < kernelF.length; ++i)
                      kernelF[i] = (float) kernel[currentDim][i];

                    convolve(
                        inputIterator,
                        outputIterator,
                        currentDim,
                        kernelF,
                        startPosition,
                        loopSize);
                  }
                });
      SimpleMultiThreading.startAndJoin(threads);
    }

    processingTime = System.currentTimeMillis() - startTime;

    return true;
  }