/**
   * Somewhat multi-threaded depth-first search. Performs a DFS of the subtrees from the current
   * node in parallel.
   *
   * @param s The tile sequence
   * @param b The board to search
   * @param pool The thread pool in which to submit jobs to.
   * @return The board with the highest evaluation or null if no board can continue.
   */
  private Board solve_pdfs(Board b, ExecutorService pool) {
    List<Future<Board>> rets = new ArrayList<>(BOARD_WIDTH);
    Board best = null;
    int best_score = -1;

    for (Direction d : directions) {
      Board n = new Board(b);
      if (n.move(tileSequence, d)) {
        rets.add(pool.submit(new ParallelDFS(n)));
      }
    }

    for (Future<Board> ret : rets) {
      try {
        Board c = ret.get();
        if (c != null) {
          int score = evaluate(c);
          if (score > best_score) {
            best = c;
            best_score = score;
          }
        }
      } catch (InterruptedException | ExecutionException e) {
        System.err.println("Error: " + e.getMessage());
      }
    }

    return best;
  }
 private CudaEngine(final int deviceId) {
   exe = Executors.newSingleThreadExecutor(); // mandatory: Only one cuda thread per context
   Id = deviceId;
   try {
     exe.submit(
             new Runnable() {
               @Override
               public void run() {
                 CUdevice device = new CUdevice();
                 JCudaDriver.cuDeviceGet(device, deviceId);
                 int array[] = {0};
                 JCudaDriver.cuDeviceGetAttribute(
                     array, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device);
                 maxThreads = (int) Math.sqrt(array[0]);
                 context = new CUcontext();
                 //					JCudaDriver.cuCtxCreate(context, CUctx_flags.CU_CTX_SCHED_BLOCKING_SYNC,
                 // device);
                 JCudaDriver.cuCtxCreate(context, 0, device);
                 CUmodule m = new CUmodule();
                 initModules(m);
                 for (Kernel k : Kernel.values()) {
                   initFunction(m, k);
                 }
                 //					JCudaDriver.cuCtxSetCacheConfig(CUfunc_cache.CU_FUNC_CACHE_PREFER_NONE);>
                 //
                 //	JCudaDriver.cuCtxSetSharedMemConfig(CUsharedconfig.CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE);
               }
             })
         .get();
   } catch (InterruptedException | ExecutionException e) {
     throw new RuntimeException(e.getMessage());
   }
   neigborsPtrs = new HashMap<>();
 }
 public static boolean init() {
   synchronized (cudaEngines) {
     System.err.println("---------Initializing Cuda----------------");
     try {
       extractAndLoadNativeLibs();
       JCudaDriver.setExceptionsEnabled(true);
       JCudaDriver.cuInit(0);
       compileKernelsPtx();
       // Obtain the number of devices
       int deviceCountArray[] = {0};
       JCudaDriver.cuDeviceGetCount(deviceCountArray);
       availableDevicesNb = deviceCountArray[0];
       if (availableDevicesNb == 0) return false;
       availableDevicesNb = NB_OF_DEVICE_TO_USE; // TODO
       initialization = Executors.newCachedThreadPool();
       System.out.println("Found " + availableDevicesNb + " GPU devices");
       for (int i = 0 /*-NB_OF_DEVICE_TO_USE*/; i < availableDevicesNb; i++) {
         final int index = i;
         Future<?> initJob =
             initialization.submit(
                 new Runnable() {
                   public void run() {
                     System.err.println("Initializing device n°" + index);
                     cudaEngines.put(index, new CudaEngine(index));
                   }
                 });
         initJob.get();
         initialization.shutdown();
       }
     } catch (InterruptedException
         | ExecutionException
         | IOException
         | CudaException
         | UnsatisfiedLinkError e) {
       e.printStackTrace();
       System.err.println("---------Cannot initialize Cuda !!! ----------------");
       return false;
     }
     Runtime.getRuntime()
         .addShutdownHook(
             new Thread() {
               @Override
               public void run() {
                 CudaEngine.stop();
               }
             });
     System.out.println("---------Cuda Initialized----------------");
     return true;
   }
 }
 public void cuCtxSynchronize() {
   try {
     exe.submit(
             new Callable<Void>() {
               @Override
               public Void call() throws Exception {
                 JCudaDriver.cuCtxSynchronize();
                 return null;
               }
             })
         .get();
   } catch (InterruptedException | ExecutionException e) {
     e.printStackTrace();
   }
 }
  public static void main(String[] args) {
    ExecutorService executorService = Executors.newFixedThreadPool(4);
    List<Future<String>> results = new ArrayList<>();
    String[] URLs = {
      "http://www.weather.gov",
      "http://www.espn.com",
      "http://www.marketwatch.com",
      "http://www.fandango.com"
    };

    for (String URL : URLs) {
      Callable callable =
          new Callable<String>() {
            @Override
            public String call() throws Exception {
              Thread.sleep(1000);
              System.out.println("Retrieving URL: " + URL);
              return SiteDownloader.get(URL);
            }
          };

      System.out.println("Submitting task for: " + URL);
      results.add(executorService.submit(callable));
    }

    boolean done = false;
    while (!done) {
      System.out.println("Still getting data...");
      done = areAllTrue(results);
      try {
        Thread.sleep(200);
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
    }

    for (Future<String> result : results) {
      try {
        System.out.println("Site data: " + result.get());
      } catch (InterruptedException | ExecutionException e) {
        e.printStackTrace();
      }
    }
    executorService.shutdown();
  }