private synchronized void shutdown() {
   if (!exe.isShutdown()) {
     freeCUObjectsMemory();
   }
   exe.shutdown();
   try {
     System.err.println(
         "cuda device " + Id + " freed ? " + exe.awaitTermination(10, TimeUnit.SECONDS));
   } catch (InterruptedException e) {
     e.printStackTrace();
   }
 }
 public static boolean init() {
   synchronized (cudaEngines) {
     System.err.println("---------Initializing Cuda----------------");
     try {
       extractAndLoadNativeLibs();
       JCudaDriver.setExceptionsEnabled(true);
       JCudaDriver.cuInit(0);
       compileKernelsPtx();
       // Obtain the number of devices
       int deviceCountArray[] = {0};
       JCudaDriver.cuDeviceGetCount(deviceCountArray);
       availableDevicesNb = deviceCountArray[0];
       if (availableDevicesNb == 0) return false;
       availableDevicesNb = NB_OF_DEVICE_TO_USE; // TODO
       initialization = Executors.newCachedThreadPool();
       System.out.println("Found " + availableDevicesNb + " GPU devices");
       for (int i = 0 /*-NB_OF_DEVICE_TO_USE*/; i < availableDevicesNb; i++) {
         final int index = i;
         Future<?> initJob =
             initialization.submit(
                 new Runnable() {
                   public void run() {
                     System.err.println("Initializing device n°" + index);
                     cudaEngines.put(index, new CudaEngine(index));
                   }
                 });
         initJob.get();
         initialization.shutdown();
       }
     } catch (InterruptedException
         | ExecutionException
         | IOException
         | CudaException
         | UnsatisfiedLinkError e) {
       e.printStackTrace();
       System.err.println("---------Cannot initialize Cuda !!! ----------------");
       return false;
     }
     Runtime.getRuntime()
         .addShutdownHook(
             new Thread() {
               @Override
               public void run() {
                 CudaEngine.stop();
               }
             });
     System.out.println("---------Cuda Initialized----------------");
     return true;
   }
 }
 private CudaEngine(final int deviceId) {
   exe = Executors.newSingleThreadExecutor(); // mandatory: Only one cuda thread per context
   Id = deviceId;
   try {
     exe.submit(
             new Runnable() {
               @Override
               public void run() {
                 CUdevice device = new CUdevice();
                 JCudaDriver.cuDeviceGet(device, deviceId);
                 int array[] = {0};
                 JCudaDriver.cuDeviceGetAttribute(
                     array, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device);
                 maxThreads = (int) Math.sqrt(array[0]);
                 context = new CUcontext();
                 //					JCudaDriver.cuCtxCreate(context, CUctx_flags.CU_CTX_SCHED_BLOCKING_SYNC,
                 // device);
                 JCudaDriver.cuCtxCreate(context, 0, device);
                 CUmodule m = new CUmodule();
                 initModules(m);
                 for (Kernel k : Kernel.values()) {
                   initFunction(m, k);
                 }
                 //					JCudaDriver.cuCtxSetCacheConfig(CUfunc_cache.CU_FUNC_CACHE_PREFER_NONE);>
                 //
                 //	JCudaDriver.cuCtxSetSharedMemConfig(CUsharedconfig.CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE);
               }
             })
         .get();
   } catch (InterruptedException | ExecutionException e) {
     throw new RuntimeException(e.getMessage());
   }
   neigborsPtrs = new HashMap<>();
 }
  public static CudaEngine getCudaEngine(CudaObject co) {
    synchronized (cudaEngines) {
      if (!isCudaAvailable()) throw new CudaException("No cuda device found");
      try {
        initialization.awaitTermination(100, TimeUnit.SECONDS);
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
      Pheromone p = (Pheromone) co;
      final int pheroID = cudaObjectID.incrementAndGet();

      final CudaEngine ce = cudaEngines.get(pheroID % availableDevicesNb);
      //			final CudaEngine ce = cudaEngines.get(1);
      //			final CudaEngine ce = cudaEngines.get(0);

      //			final CudaEngine ce;
      //			if(p.getName().contains("PRE")){
      //				ce = cudaEngines.get(0);
      //			}
      //			else{
      //				ce = cudaEngines.get(1);
      //			}
      //
      ce.cudaObjects.add(co);
      System.err.println(co + "ID " + pheroID + " getting cuda engine Id " + ce.Id);
      return ce;
    }
  }
 /** Free memory from the currently registered CUObjects */
 public void freeCUObjectsMemory() {
   exe.submit(
       new Runnable() {
         @Override
         public void run() {
           cuCtxSynchronize();
           for (CudaObject co : cudaObjects) {
             co.freeMemory();
           }
           JCudaDriver.cuCtxDestroy(context);
         }
       });
 }
 public void cuCtxSynchronize() {
   try {
     exe.submit(
             new Callable<Void>() {
               @Override
               public Void call() throws Exception {
                 JCudaDriver.cuCtxSynchronize();
                 return null;
               }
             })
         .get();
   } catch (InterruptedException | ExecutionException e) {
     e.printStackTrace();
   }
 }
 public Future<?> submit(Runnable runnable) {
   if (!exe.isShutdown()) {
     return exe.submit(runnable);
   }
   return null;
 }