예제 #1
0
 public void compute(
     final float[] scores,
     final float[] whiteObservations,
     final float[] blackObservations,
     final int sequenceLength) {
   int gridSizeX = (int) Math.ceil(((double) sequenceLength) / (blockSizeX * rollX));
   int extendedSeqLength = gridSizeX * (blockSizeX * rollX);
   d_Ow.write(
       queue,
       pc.capture(
           Pointer.pointerToFloats(
               CudaUtil.extendWithZeros(
                   whiteObservations,
                   (extendedSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT))),
       false);
   d_Ob.write(
       queue,
       pc.capture(
           Pointer.pointerToFloats(
               CudaUtil.extendWithZeros(
                   blackObservations,
                   (extendedSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT))),
       false);
   queue.enqueueBarrier();
   for (int tw = minTemplateWidth; tw <= maxTemplateWidth; ++tw) {
     if (templateNumIndices[tw - minTemplateWidth] > 0) {
       int gridSizeY =
           (int) Math.ceil(((double) templateNumIndices[tw - minTemplateWidth]) / blockSizeY);
       CLKernel computeKernel = program.createKernel("compute_emissions_" + tw);
       computeKernel.setArgs(
           templateIndicesOffsets[tw - minTemplateWidth] * sequenceLength,
           sequenceLength,
           templateNumIndices[tw - minTemplateWidth],
           d_Tw[tw - minTemplateWidth],
           d_Tb[tw - minTemplateWidth],
           d_Ow,
           d_Ob,
           d_scores);
       computeKernel.enqueueNDRange(
           queue,
           new int[] {gridSizeX * blockSizeX, gridSizeY * blockSizeY},
           new int[] {blockSizeX, blockSizeY});
     }
   }
   queue.enqueueBarrier();
   d_scores.read(queue).getFloats(scores);
 }
예제 #2
0
  public static synchronized boolean addVec(
      CLDevice.Type clType, int[] vecC, int[] vecA, int[] vecB) {

    try {
      if ((Logger.getLogMask() & Level.DEFAULT.DEBUG.getLevel().getValue())
          == Level.DEFAULT.DEBUG.getLevel().getValue()) {
        Logger.logDebug(CLAZZ, "addVec() vecA: " + Convert.toString(vecA));
        Logger.logDebug(CLAZZ, "addVec() vecB: " + Convert.toString(vecB));
      }

      /** * Initialisiere OpenCL-Objekte ** */
      initCL(clType);

      /** * Ausgabe von Informationen ueber gewaehltes OpenCL-Device ** */
      Logger.logInfo(CLAZZ, "max compute units: " + devices.get(0).getMaxComputeUnits());
      Logger.logInfo(CLAZZ, "max work group sizes: " + devices.get(0).getMaxWorkGroupSize());
      Logger.logInfo(
          CLAZZ, "max global mem size (KB): " + devices.get(0).getGlobalMemSize() / 1024);
      Logger.logInfo(CLAZZ, "max local mem size (KB): " + devices.get(0).getLocalMemSize() / 1024);

      /** * Erstellen und Vorbereiten der Daten ** */
      IntBuffer tmpBuffer =
          ByteBuffer.allocateDirect(vecA.length * Integer.SIZE)
              .order(context.getByteOrder())
              .asIntBuffer();

      tmpBuffer.put(vecA);
      CLBuffer<IntBuffer> aBuffer = context.createBuffer(CLMem.Usage.Input, tmpBuffer, true);

      tmpBuffer.clear();
      tmpBuffer.put(vecB);
      CLBuffer<IntBuffer> bBuffer = context.createBuffer(CLMem.Usage.Input, tmpBuffer, true);

      CLBuffer<IntBuffer> cBuffer =
          context.createBuffer(CLMem.Usage.Output, vecC.length, IntBuffer.class);

      /** * Kernel-Argumente setzen ** */
      kernel.setArg(0, cBuffer);
      kernel.setArg(1, aBuffer);
      kernel.setArg(2, bBuffer);
      kernel.setArg(3, vecC.length);

      /** * Kernel ausfuehren und auf Abarbeitung warten ** */
      CLEvent event = kernel.enqueueNDRange(cmdQ, new int[] {vecC.length}, new CLEvent[0]);
      event.waitFor();
      cmdQ.finish();

      /** * Daten vom OpenCL-Device holen ** */
      cBuffer.read(cmdQ, tmpBuffer, true, new CLEvent[0]);
      tmpBuffer.clear();
      tmpBuffer.get(vecC);

      if ((Logger.getLogMask() & Level.DEFAULT.DEBUG.getLevel().getValue())
          == Level.DEFAULT.DEBUG.getLevel().getValue()) {
        Logger.logDebug(CLAZZ, "addVec() vecC: " + Convert.toString(vecC));
      }
    } catch (CLException err) {
      Logger.logError(CLAZZ, "OpenCL error:\n" + err.getMessage() + "():" + err.getCode());
      err.printStackTrace();
      return EXIT_FAILURE;
    } catch (Exception err) {
      Logger.logError(CLAZZ, "Error:\n" + err.getMessage() + "()");
      err.printStackTrace();
      return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
  }