private synchronized void shutdown() { if (!exe.isShutdown()) { freeCUObjectsMemory(); } exe.shutdown(); try { System.err.println( "cuda device " + Id + " freed ? " + exe.awaitTermination(10, TimeUnit.SECONDS)); } catch (InterruptedException e) { e.printStackTrace(); } }
public static boolean init() { synchronized (cudaEngines) { System.err.println("---------Initializing Cuda----------------"); try { extractAndLoadNativeLibs(); JCudaDriver.setExceptionsEnabled(true); JCudaDriver.cuInit(0); compileKernelsPtx(); // Obtain the number of devices int deviceCountArray[] = {0}; JCudaDriver.cuDeviceGetCount(deviceCountArray); availableDevicesNb = deviceCountArray[0]; if (availableDevicesNb == 0) return false; availableDevicesNb = NB_OF_DEVICE_TO_USE; // TODO initialization = Executors.newCachedThreadPool(); System.out.println("Found " + availableDevicesNb + " GPU devices"); for (int i = 0 /*-NB_OF_DEVICE_TO_USE*/; i < availableDevicesNb; i++) { final int index = i; Future<?> initJob = initialization.submit( new Runnable() { public void run() { System.err.println("Initializing device n°" + index); cudaEngines.put(index, new CudaEngine(index)); } }); initJob.get(); initialization.shutdown(); } } catch (InterruptedException | ExecutionException | IOException | CudaException | UnsatisfiedLinkError e) { e.printStackTrace(); System.err.println("---------Cannot initialize Cuda !!! ----------------"); return false; } Runtime.getRuntime() .addShutdownHook( new Thread() { @Override public void run() { CudaEngine.stop(); } }); System.out.println("---------Cuda Initialized----------------"); return true; } }
private CudaEngine(final int deviceId) { exe = Executors.newSingleThreadExecutor(); // mandatory: Only one cuda thread per context Id = deviceId; try { exe.submit( new Runnable() { @Override public void run() { CUdevice device = new CUdevice(); JCudaDriver.cuDeviceGet(device, deviceId); int array[] = {0}; JCudaDriver.cuDeviceGetAttribute( array, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device); maxThreads = (int) Math.sqrt(array[0]); context = new CUcontext(); // JCudaDriver.cuCtxCreate(context, CUctx_flags.CU_CTX_SCHED_BLOCKING_SYNC, // device); JCudaDriver.cuCtxCreate(context, 0, device); CUmodule m = new CUmodule(); initModules(m); for (Kernel k : Kernel.values()) { initFunction(m, k); } // JCudaDriver.cuCtxSetCacheConfig(CUfunc_cache.CU_FUNC_CACHE_PREFER_NONE);> // // JCudaDriver.cuCtxSetSharedMemConfig(CUsharedconfig.CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE); } }) .get(); } catch (InterruptedException | ExecutionException e) { throw new RuntimeException(e.getMessage()); } neigborsPtrs = new HashMap<>(); }
public static CudaEngine getCudaEngine(CudaObject co) { synchronized (cudaEngines) { if (!isCudaAvailable()) throw new CudaException("No cuda device found"); try { initialization.awaitTermination(100, TimeUnit.SECONDS); } catch (InterruptedException e) { e.printStackTrace(); } Pheromone p = (Pheromone) co; final int pheroID = cudaObjectID.incrementAndGet(); final CudaEngine ce = cudaEngines.get(pheroID % availableDevicesNb); // final CudaEngine ce = cudaEngines.get(1); // final CudaEngine ce = cudaEngines.get(0); // final CudaEngine ce; // if(p.getName().contains("PRE")){ // ce = cudaEngines.get(0); // } // else{ // ce = cudaEngines.get(1); // } // ce.cudaObjects.add(co); System.err.println(co + "ID " + pheroID + " getting cuda engine Id " + ce.Id); return ce; } }
/** Free memory from the currently registered CUObjects */ public void freeCUObjectsMemory() { exe.submit( new Runnable() { @Override public void run() { cuCtxSynchronize(); for (CudaObject co : cudaObjects) { co.freeMemory(); } JCudaDriver.cuCtxDestroy(context); } }); }
public void cuCtxSynchronize() { try { exe.submit( new Callable<Void>() { @Override public Void call() throws Exception { JCudaDriver.cuCtxSynchronize(); return null; } }) .get(); } catch (InterruptedException | ExecutionException e) { e.printStackTrace(); } }
public Future<?> submit(Runnable runnable) { if (!exe.isShutdown()) { return exe.submit(runnable); } return null; }