예제 #1
0
  private static void initCL(CLDevice.Type clType) throws Exception {
    /** * Hole OpenCL-Plattformen z.B. AMD APP, NVIDIA CUDA ** */
    platforms = JavaCL.listPlatforms();

    /** * Hole OpenCL-Device des geforderten Typs z.B. GPU, CPU ** */
    EnumSet<CLDevice.Type> types = EnumSet.of(clType);
    devices = new ArrayList<CLDevice>();
    CLDevice[] devTmp;

    for (CLPlatform platform : platforms) {
      devTmp = platform.listDevices(types, true);
      devices.addAll(Arrays.asList(devTmp));
    }

    /** * Erstelle OpenCL-Context und CommandQueue ** */
    devTmp = new CLDevice[devices.size()];
    context = JavaCL.createContext(null, devices.toArray(devTmp));
    cmdQ = context.createDefaultQueue(QueueProperties.ProfilingEnable);

    /** * OpenCL-Quellcode einlesen ** */
    String src = readFile(KERNEL_PATH);
    // String src = KERNEL_SRC;

    /** * OpenCL-Programm aus Quellcode erstellen ** */
    program = context.createProgram(src);

    try {
      program.build();
    } catch (CLBuildException err) {
      Logger.logError(CLAZZ, "Build log for \"" + devices.get(0) + "\n" + err.getMessage());
      throw err;
    }

    /** * OpenCL-Kernel laden ** */
    kernel = program.createKernel("addVec");
  }
예제 #2
0
  public OpenCLInnerLoop(int numThreads) {
    // choose device
    List<CLDevice> devices = new ArrayList<CLDevice>();
    System.out.println();
    for (CLPlatform platform : JavaCL.listPlatforms()) {
      for (CLDevice device : platform.listAllDevices(true)) {
        System.out.println("Type: " + device.getType());
        System.out.println("Vendor: " + device.getVendor());
        System.out.println("Name: " + device.getName());
        System.out.println("Compute units: " + device.getMaxComputeUnits());
        System.out.println("Global mem: " + device.getGlobalMemSize() / 1e6 + "MB");
        System.out.println("Driver version: " + device.getDriverVersion());
        System.out.println();
        devices.add(device);
      }
    }
    if (context == null) {
      for (CLDevice device : devices) {
        if (device.getVendor().toLowerCase().contains("intel")
            && device.getType().contains(CLDevice.Type.GPU)
            && device.getMaxComputeUnits() >= 140
            && device.getGlobalMemSize() > 512e6) {
          this.context = JavaCL.createContext(null, device);
        }
      }
    }
    //	    if (context == null) {
    //	    	for (CLDevice device : devices) {
    //	    		if (device.getVendor().toLowerCase().contains("nvidia") &&
    // device.getType().contains(CLDevice.Type.GPU) && device.getMaxComputeUnits() >= 8 &&
    // device.getGlobalMemSize() > 1e9 &&
    // !device.getPlatform().getName().toLowerCase().contains("apple")) {
    //	    			this.context = JavaCL.createContext(null, device);
    //	    		}
    //	    	}
    //	    }
    if (context == null) {
      this.context = JavaCL.createBestContext(DeviceFeature.CPU);
    }
    if (context.getDevices()[0].getType().contains(CLDevice.Type.GPU)
        && context.getDevices()[0].getVendor().toLowerCase().contains("nvidia")) {
      this.blockSizeX = NVIDIA_GPU_BLOCK_SIZE_X;
      this.rollX = NVIDIA_GPU_ROLL_X;
      this.blockSizeY = NVIDIA_GPU_BLOCK_SIZE_Y;
    } else if (context.getDevices()[0].getType().contains(CLDevice.Type.GPU)
        && context.getDevices()[0].getVendor().toLowerCase().contains("intel")) {
      this.blockSizeX = INTEL_GPU_BLOCK_SIZE_X;
      this.rollX = INTEL_GPU_ROLL_X;
      this.blockSizeY = INTEL_GPU_BLOCK_SIZE_Y;
    } else if (context.getDevices()[0].getType().contains(CLDevice.Type.CPU)) {
      this.blockSizeX = CPU_BLOCK_SIZE_X;
      this.rollX = CPU_ROLL_X;
      this.blockSizeY = CPU_BLOCK_SIZE_Y;
    }
    System.out.println("Using context:");
    System.out.println(context.toString());
    System.out.println("Block size x: " + blockSizeX);
    System.out.println("Roll x: " + rollX);
    System.out.println("Block size y: " + blockSizeY);

    this.context.setCacheBinaries(false);
    this.queue = context.createDefaultQueue();
    this.program = context.createProgram(kernelSrc());
    this.program.addBuildOption("-cl-fast-relaxed-math");
    this.program.addBuildOption("-cl-mad-enable");
    this.program.addBuildOption("-cl-unsafe-math-optimizations");
    this.program.addBuildOption("-cl-fast-relaxed-math");
    this.program.addBuildOption("-cl-single-precision-constant");
    this.program.build();

    this.pc = new PointerCapturer();
    this.numThreads = numThreads;
  }