static FloatBuffer getUnifiedFloatBuffer(Pointer pinnedMemory, CUdeviceptr devicePtr, long size) {
   JCudaDriver.cuMemHostAlloc(pinnedMemory, size, JCudaDriver.CU_MEMHOSTALLOC_DEVICEMAP);
   final ByteBuffer byteBuffer = pinnedMemory.getByteBuffer(0, size);
   byteBuffer.order(ByteOrder.nativeOrder());
   JCudaDriver.cuMemHostGetDevicePointer(devicePtr, pinnedMemory, 0);
   return byteBuffer.asFloatBuffer();
 }
  public static int[] getUnifiedIntArray(Pointer pinnedMemory, CUdeviceptr devicePtr, int size) {
    int[] values = new int[size];
    JCudaDriver.cuMemHostAlloc(pinnedMemory, size, JCudaDriver.CU_MEMHOSTALLOC_DEVICEMAP);
    final ByteBuffer byteBuffer = pinnedMemory.getByteBuffer(0, size);
    byteBuffer.order(ByteOrder.nativeOrder());
    JCudaDriver.cuMemHostGetDevicePointer(devicePtr, pinnedMemory, 0);

    return values;
  }
 public static boolean init() {
   synchronized (cudaEngines) {
     System.err.println("---------Initializing Cuda----------------");
     try {
       extractAndLoadNativeLibs();
       JCudaDriver.setExceptionsEnabled(true);
       JCudaDriver.cuInit(0);
       compileKernelsPtx();
       // Obtain the number of devices
       int deviceCountArray[] = {0};
       JCudaDriver.cuDeviceGetCount(deviceCountArray);
       availableDevicesNb = deviceCountArray[0];
       if (availableDevicesNb == 0) return false;
       availableDevicesNb = NB_OF_DEVICE_TO_USE; // TODO
       initialization = Executors.newCachedThreadPool();
       System.out.println("Found " + availableDevicesNb + " GPU devices");
       for (int i = 0 /*-NB_OF_DEVICE_TO_USE*/; i < availableDevicesNb; i++) {
         final int index = i;
         Future<?> initJob =
             initialization.submit(
                 new Runnable() {
                   public void run() {
                     System.err.println("Initializing device n°" + index);
                     cudaEngines.put(index, new CudaEngine(index));
                   }
                 });
         initJob.get();
         initialization.shutdown();
       }
     } catch (InterruptedException
         | ExecutionException
         | IOException
         | CudaException
         | UnsatisfiedLinkError e) {
       e.printStackTrace();
       System.err.println("---------Cannot initialize Cuda !!! ----------------");
       return false;
     }
     Runtime.getRuntime()
         .addShutdownHook(
             new Thread() {
               @Override
               public void run() {
                 CudaEngine.stop();
               }
             });
     System.out.println("---------Cuda Initialized----------------");
     return true;
   }
 }
 private void initFunction(CUmodule module, Kernel name) {
   CUfunction function = new CUfunction();
   JCudaDriver.cuModuleGetFunction(function, module, name.name());
   kernels.put(name, function);
 }
 static void initModules(CUmodule module) {
   JCudaDriver.cuModuleLoad(module, new File(ioTmpDir, PHEROMONES_CU + ".ptx").getAbsolutePath());
 }