예제 #1
0
  private static void initCL(CLDevice.Type clType) throws Exception {
    /** * Hole OpenCL-Plattformen z.B. AMD APP, NVIDIA CUDA ** */
    platforms = JavaCL.listPlatforms();

    /** * Hole OpenCL-Device des geforderten Typs z.B. GPU, CPU ** */
    EnumSet<CLDevice.Type> types = EnumSet.of(clType);
    devices = new ArrayList<CLDevice>();
    CLDevice[] devTmp;

    for (CLPlatform platform : platforms) {
      devTmp = platform.listDevices(types, true);
      devices.addAll(Arrays.asList(devTmp));
    }

    /** * Erstelle OpenCL-Context und CommandQueue ** */
    devTmp = new CLDevice[devices.size()];
    context = JavaCL.createContext(null, devices.toArray(devTmp));
    cmdQ = context.createDefaultQueue(QueueProperties.ProfilingEnable);

    /** * OpenCL-Quellcode einlesen ** */
    String src = readFile(KERNEL_PATH);
    // String src = KERNEL_SRC;

    /** * OpenCL-Programm aus Quellcode erstellen ** */
    program = context.createProgram(src);

    try {
      program.build();
    } catch (CLBuildException err) {
      Logger.logError(CLAZZ, "Build log for \"" + devices.get(0) + "\n" + err.getMessage());
      throw err;
    }

    /** * OpenCL-Kernel laden ** */
    kernel = program.createKernel("addVec");
  }
예제 #2
0
 public void compute(
     final float[] scores,
     final float[] whiteObservations,
     final float[] blackObservations,
     final int sequenceLength) {
   int gridSizeX = (int) Math.ceil(((double) sequenceLength) / (blockSizeX * rollX));
   int extendedSeqLength = gridSizeX * (blockSizeX * rollX);
   d_Ow.write(
       queue,
       pc.capture(
           Pointer.pointerToFloats(
               CudaUtil.extendWithZeros(
                   whiteObservations,
                   (extendedSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT))),
       false);
   d_Ob.write(
       queue,
       pc.capture(
           Pointer.pointerToFloats(
               CudaUtil.extendWithZeros(
                   blackObservations,
                   (extendedSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT))),
       false);
   queue.enqueueBarrier();
   for (int tw = minTemplateWidth; tw <= maxTemplateWidth; ++tw) {
     if (templateNumIndices[tw - minTemplateWidth] > 0) {
       int gridSizeY =
           (int) Math.ceil(((double) templateNumIndices[tw - minTemplateWidth]) / blockSizeY);
       CLKernel computeKernel = program.createKernel("compute_emissions_" + tw);
       computeKernel.setArgs(
           templateIndicesOffsets[tw - minTemplateWidth] * sequenceLength,
           sequenceLength,
           templateNumIndices[tw - minTemplateWidth],
           d_Tw[tw - minTemplateWidth],
           d_Tb[tw - minTemplateWidth],
           d_Ow,
           d_Ob,
           d_scores);
       computeKernel.enqueueNDRange(
           queue,
           new int[] {gridSizeX * blockSizeX, gridSizeY * blockSizeY},
           new int[] {blockSizeX, blockSizeY});
     }
   }
   queue.enqueueBarrier();
   d_scores.read(queue).getFloats(scores);
 }