コード例 #1
0
ファイル: OpenCLInnerLoop.java プロジェクト: duncdrum/ocular
  @SuppressWarnings("unchecked")
  public void startup(
      float[][] whiteTemplates,
      float[][] blackTemplates,
      int[] templateNumIndices,
      int[] templateIndicesOffsets,
      int minTemplateWidth,
      int maxTemplateWidth,
      int maxSequenceLength,
      int totalTemplateNumIndices) {
    this.templateNumIndices = templateNumIndices;
    this.templateIndicesOffsets = templateIndicesOffsets;
    this.maxTemplateWidth = maxTemplateWidth;
    this.minTemplateWidth = minTemplateWidth;

    // Allocate the device input data
    int extendedMaxSeqLength =
        (blockSizeX * rollX) * (int) Math.ceil(((double) maxSequenceLength) / (blockSizeX * rollX));
    this.d_Ow =
        context.createFloatBuffer(
            Usage.Input,
            (extendedMaxSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT);
    this.d_Ob =
        context.createFloatBuffer(
            Usage.Input,
            (extendedMaxSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT);
    this.d_scores =
        context.createFloatBuffer(Usage.Output, maxSequenceLength * totalTemplateNumIndices);

    int numTemplateWidths = (maxTemplateWidth - minTemplateWidth) + 1;
    this.d_Tw = new CLBuffer[numTemplateWidths];
    this.d_Tb = new CLBuffer[numTemplateWidths];
    for (int tw = minTemplateWidth; tw <= maxTemplateWidth; ++tw) {
      if (templateNumIndices[tw - minTemplateWidth] > 0) {
        d_Tw[tw - minTemplateWidth] =
            context.createFloatBuffer(Usage.Input, whiteTemplates[tw - minTemplateWidth].length);
        d_Tw[tw - minTemplateWidth].write(
            queue,
            pc.capture(Pointer.pointerToFloats(whiteTemplates[tw - minTemplateWidth])),
            false);

        d_Tb[tw - minTemplateWidth] =
            context.createFloatBuffer(Usage.Input, whiteTemplates[tw - minTemplateWidth].length);
        d_Tb[tw - minTemplateWidth].write(
            queue,
            pc.capture(Pointer.pointerToFloats(blackTemplates[tw - minTemplateWidth])),
            false);
      }
    }
  }
コード例 #2
0
ファイル: OpenCLInnerLoop.java プロジェクト: duncdrum/ocular
 public void compute(
     final float[] scores,
     final float[] whiteObservations,
     final float[] blackObservations,
     final int sequenceLength) {
   int gridSizeX = (int) Math.ceil(((double) sequenceLength) / (blockSizeX * rollX));
   int extendedSeqLength = gridSizeX * (blockSizeX * rollX);
   d_Ow.write(
       queue,
       pc.capture(
           Pointer.pointerToFloats(
               CudaUtil.extendWithZeros(
                   whiteObservations,
                   (extendedSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT))),
       false);
   d_Ob.write(
       queue,
       pc.capture(
           Pointer.pointerToFloats(
               CudaUtil.extendWithZeros(
                   blackObservations,
                   (extendedSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT))),
       false);
   queue.enqueueBarrier();
   for (int tw = minTemplateWidth; tw <= maxTemplateWidth; ++tw) {
     if (templateNumIndices[tw - minTemplateWidth] > 0) {
       int gridSizeY =
           (int) Math.ceil(((double) templateNumIndices[tw - minTemplateWidth]) / blockSizeY);
       CLKernel computeKernel = program.createKernel("compute_emissions_" + tw);
       computeKernel.setArgs(
           templateIndicesOffsets[tw - minTemplateWidth] * sequenceLength,
           sequenceLength,
           templateNumIndices[tw - minTemplateWidth],
           d_Tw[tw - minTemplateWidth],
           d_Tb[tw - minTemplateWidth],
           d_Ow,
           d_Ob,
           d_scores);
       computeKernel.enqueueNDRange(
           queue,
           new int[] {gridSizeX * blockSizeX, gridSizeY * blockSizeY},
           new int[] {blockSizeX, blockSizeY});
     }
   }
   queue.enqueueBarrier();
   d_scores.read(queue).getFloats(scores);
 }
コード例 #3
0
ファイル: OpenCLInnerLoop.java プロジェクト: duncdrum/ocular
 public void shutdown() {
   d_Ow.release();
   d_Ob.release();
   d_scores.release();
   for (int tw = minTemplateWidth; tw <= maxTemplateWidth; ++tw) {
     if (templateNumIndices[tw - minTemplateWidth] > 0) {
       d_Tw[tw - minTemplateWidth].release();
       d_Tb[tw - minTemplateWidth].release();
     }
   }
   pc.releaseAll();
 }