예제 #1
0
  @SuppressWarnings("unchecked")
  public void startup(
      float[][] whiteTemplates,
      float[][] blackTemplates,
      int[] templateNumIndices,
      int[] templateIndicesOffsets,
      int minTemplateWidth,
      int maxTemplateWidth,
      int maxSequenceLength,
      int totalTemplateNumIndices) {
    this.templateNumIndices = templateNumIndices;
    this.templateIndicesOffsets = templateIndicesOffsets;
    this.maxTemplateWidth = maxTemplateWidth;
    this.minTemplateWidth = minTemplateWidth;

    // Allocate the device input data
    int extendedMaxSeqLength =
        (blockSizeX * rollX) * (int) Math.ceil(((double) maxSequenceLength) / (blockSizeX * rollX));
    this.d_Ow =
        context.createFloatBuffer(
            Usage.Input,
            (extendedMaxSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT);
    this.d_Ob =
        context.createFloatBuffer(
            Usage.Input,
            (extendedMaxSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT);
    this.d_scores =
        context.createFloatBuffer(Usage.Output, maxSequenceLength * totalTemplateNumIndices);

    int numTemplateWidths = (maxTemplateWidth - minTemplateWidth) + 1;
    this.d_Tw = new CLBuffer[numTemplateWidths];
    this.d_Tb = new CLBuffer[numTemplateWidths];
    for (int tw = minTemplateWidth; tw <= maxTemplateWidth; ++tw) {
      if (templateNumIndices[tw - minTemplateWidth] > 0) {
        d_Tw[tw - minTemplateWidth] =
            context.createFloatBuffer(Usage.Input, whiteTemplates[tw - minTemplateWidth].length);
        d_Tw[tw - minTemplateWidth].write(
            queue,
            pc.capture(Pointer.pointerToFloats(whiteTemplates[tw - minTemplateWidth])),
            false);

        d_Tb[tw - minTemplateWidth] =
            context.createFloatBuffer(Usage.Input, whiteTemplates[tw - minTemplateWidth].length);
        d_Tb[tw - minTemplateWidth].write(
            queue,
            pc.capture(Pointer.pointerToFloats(blackTemplates[tw - minTemplateWidth])),
            false);
      }
    }
  }
예제 #2
0
  private static void initCL(CLDevice.Type clType) throws Exception {
    /** * Hole OpenCL-Plattformen z.B. AMD APP, NVIDIA CUDA ** */
    platforms = JavaCL.listPlatforms();

    /** * Hole OpenCL-Device des geforderten Typs z.B. GPU, CPU ** */
    EnumSet<CLDevice.Type> types = EnumSet.of(clType);
    devices = new ArrayList<CLDevice>();
    CLDevice[] devTmp;

    for (CLPlatform platform : platforms) {
      devTmp = platform.listDevices(types, true);
      devices.addAll(Arrays.asList(devTmp));
    }

    /** * Erstelle OpenCL-Context und CommandQueue ** */
    devTmp = new CLDevice[devices.size()];
    context = JavaCL.createContext(null, devices.toArray(devTmp));
    cmdQ = context.createDefaultQueue(QueueProperties.ProfilingEnable);

    /** * OpenCL-Quellcode einlesen ** */
    String src = readFile(KERNEL_PATH);
    // String src = KERNEL_SRC;

    /** * OpenCL-Programm aus Quellcode erstellen ** */
    program = context.createProgram(src);

    try {
      program.build();
    } catch (CLBuildException err) {
      Logger.logError(CLAZZ, "Build log for \"" + devices.get(0) + "\n" + err.getMessage());
      throw err;
    }

    /** * OpenCL-Kernel laden ** */
    kernel = program.createKernel("addVec");
  }
예제 #3
0
  public static synchronized boolean addVec(
      CLDevice.Type clType, int[] vecC, int[] vecA, int[] vecB) {

    try {
      if ((Logger.getLogMask() & Level.DEFAULT.DEBUG.getLevel().getValue())
          == Level.DEFAULT.DEBUG.getLevel().getValue()) {
        Logger.logDebug(CLAZZ, "addVec() vecA: " + Convert.toString(vecA));
        Logger.logDebug(CLAZZ, "addVec() vecB: " + Convert.toString(vecB));
      }

      /** * Initialisiere OpenCL-Objekte ** */
      initCL(clType);

      /** * Ausgabe von Informationen ueber gewaehltes OpenCL-Device ** */
      Logger.logInfo(CLAZZ, "max compute units: " + devices.get(0).getMaxComputeUnits());
      Logger.logInfo(CLAZZ, "max work group sizes: " + devices.get(0).getMaxWorkGroupSize());
      Logger.logInfo(
          CLAZZ, "max global mem size (KB): " + devices.get(0).getGlobalMemSize() / 1024);
      Logger.logInfo(CLAZZ, "max local mem size (KB): " + devices.get(0).getLocalMemSize() / 1024);

      /** * Erstellen und Vorbereiten der Daten ** */
      IntBuffer tmpBuffer =
          ByteBuffer.allocateDirect(vecA.length * Integer.SIZE)
              .order(context.getByteOrder())
              .asIntBuffer();

      tmpBuffer.put(vecA);
      CLBuffer<IntBuffer> aBuffer = context.createBuffer(CLMem.Usage.Input, tmpBuffer, true);

      tmpBuffer.clear();
      tmpBuffer.put(vecB);
      CLBuffer<IntBuffer> bBuffer = context.createBuffer(CLMem.Usage.Input, tmpBuffer, true);

      CLBuffer<IntBuffer> cBuffer =
          context.createBuffer(CLMem.Usage.Output, vecC.length, IntBuffer.class);

      /** * Kernel-Argumente setzen ** */
      kernel.setArg(0, cBuffer);
      kernel.setArg(1, aBuffer);
      kernel.setArg(2, bBuffer);
      kernel.setArg(3, vecC.length);

      /** * Kernel ausfuehren und auf Abarbeitung warten ** */
      CLEvent event = kernel.enqueueNDRange(cmdQ, new int[] {vecC.length}, new CLEvent[0]);
      event.waitFor();
      cmdQ.finish();

      /** * Daten vom OpenCL-Device holen ** */
      cBuffer.read(cmdQ, tmpBuffer, true, new CLEvent[0]);
      tmpBuffer.clear();
      tmpBuffer.get(vecC);

      if ((Logger.getLogMask() & Level.DEFAULT.DEBUG.getLevel().getValue())
          == Level.DEFAULT.DEBUG.getLevel().getValue()) {
        Logger.logDebug(CLAZZ, "addVec() vecC: " + Convert.toString(vecC));
      }
    } catch (CLException err) {
      Logger.logError(CLAZZ, "OpenCL error:\n" + err.getMessage() + "():" + err.getCode());
      err.printStackTrace();
      return EXIT_FAILURE;
    } catch (Exception err) {
      Logger.logError(CLAZZ, "Error:\n" + err.getMessage() + "()");
      err.printStackTrace();
      return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
  }
예제 #4
0
  public OpenCLInnerLoop(int numThreads) {
    // choose device
    List<CLDevice> devices = new ArrayList<CLDevice>();
    System.out.println();
    for (CLPlatform platform : JavaCL.listPlatforms()) {
      for (CLDevice device : platform.listAllDevices(true)) {
        System.out.println("Type: " + device.getType());
        System.out.println("Vendor: " + device.getVendor());
        System.out.println("Name: " + device.getName());
        System.out.println("Compute units: " + device.getMaxComputeUnits());
        System.out.println("Global mem: " + device.getGlobalMemSize() / 1e6 + "MB");
        System.out.println("Driver version: " + device.getDriverVersion());
        System.out.println();
        devices.add(device);
      }
    }
    if (context == null) {
      for (CLDevice device : devices) {
        if (device.getVendor().toLowerCase().contains("intel")
            && device.getType().contains(CLDevice.Type.GPU)
            && device.getMaxComputeUnits() >= 140
            && device.getGlobalMemSize() > 512e6) {
          this.context = JavaCL.createContext(null, device);
        }
      }
    }
    //	    if (context == null) {
    //	    	for (CLDevice device : devices) {
    //	    		if (device.getVendor().toLowerCase().contains("nvidia") &&
    // device.getType().contains(CLDevice.Type.GPU) && device.getMaxComputeUnits() >= 8 &&
    // device.getGlobalMemSize() > 1e9 &&
    // !device.getPlatform().getName().toLowerCase().contains("apple")) {
    //	    			this.context = JavaCL.createContext(null, device);
    //	    		}
    //	    	}
    //	    }
    if (context == null) {
      this.context = JavaCL.createBestContext(DeviceFeature.CPU);
    }
    if (context.getDevices()[0].getType().contains(CLDevice.Type.GPU)
        && context.getDevices()[0].getVendor().toLowerCase().contains("nvidia")) {
      this.blockSizeX = NVIDIA_GPU_BLOCK_SIZE_X;
      this.rollX = NVIDIA_GPU_ROLL_X;
      this.blockSizeY = NVIDIA_GPU_BLOCK_SIZE_Y;
    } else if (context.getDevices()[0].getType().contains(CLDevice.Type.GPU)
        && context.getDevices()[0].getVendor().toLowerCase().contains("intel")) {
      this.blockSizeX = INTEL_GPU_BLOCK_SIZE_X;
      this.rollX = INTEL_GPU_ROLL_X;
      this.blockSizeY = INTEL_GPU_BLOCK_SIZE_Y;
    } else if (context.getDevices()[0].getType().contains(CLDevice.Type.CPU)) {
      this.blockSizeX = CPU_BLOCK_SIZE_X;
      this.rollX = CPU_ROLL_X;
      this.blockSizeY = CPU_BLOCK_SIZE_Y;
    }
    System.out.println("Using context:");
    System.out.println(context.toString());
    System.out.println("Block size x: " + blockSizeX);
    System.out.println("Roll x: " + rollX);
    System.out.println("Block size y: " + blockSizeY);

    this.context.setCacheBinaries(false);
    this.queue = context.createDefaultQueue();
    this.program = context.createProgram(kernelSrc());
    this.program.addBuildOption("-cl-fast-relaxed-math");
    this.program.addBuildOption("-cl-mad-enable");
    this.program.addBuildOption("-cl-unsafe-math-optimizations");
    this.program.addBuildOption("-cl-fast-relaxed-math");
    this.program.addBuildOption("-cl-single-precision-constant");
    this.program.build();

    this.pc = new PointerCapturer();
    this.numThreads = numThreads;
  }