@SuppressWarnings("unchecked") public void startup( float[][] whiteTemplates, float[][] blackTemplates, int[] templateNumIndices, int[] templateIndicesOffsets, int minTemplateWidth, int maxTemplateWidth, int maxSequenceLength, int totalTemplateNumIndices) { this.templateNumIndices = templateNumIndices; this.templateIndicesOffsets = templateIndicesOffsets; this.maxTemplateWidth = maxTemplateWidth; this.minTemplateWidth = minTemplateWidth; // Allocate the device input data int extendedMaxSeqLength = (blockSizeX * rollX) * (int) Math.ceil(((double) maxSequenceLength) / (blockSizeX * rollX)); this.d_Ow = context.createFloatBuffer( Usage.Input, (extendedMaxSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT); this.d_Ob = context.createFloatBuffer( Usage.Input, (extendedMaxSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT); this.d_scores = context.createFloatBuffer(Usage.Output, maxSequenceLength * totalTemplateNumIndices); int numTemplateWidths = (maxTemplateWidth - minTemplateWidth) + 1; this.d_Tw = new CLBuffer[numTemplateWidths]; this.d_Tb = new CLBuffer[numTemplateWidths]; for (int tw = minTemplateWidth; tw <= maxTemplateWidth; ++tw) { if (templateNumIndices[tw - minTemplateWidth] > 0) { d_Tw[tw - minTemplateWidth] = context.createFloatBuffer(Usage.Input, whiteTemplates[tw - minTemplateWidth].length); d_Tw[tw - minTemplateWidth].write( queue, pc.capture(Pointer.pointerToFloats(whiteTemplates[tw - minTemplateWidth])), false); d_Tb[tw - minTemplateWidth] = context.createFloatBuffer(Usage.Input, whiteTemplates[tw - minTemplateWidth].length); d_Tb[tw - minTemplateWidth].write( queue, pc.capture(Pointer.pointerToFloats(blackTemplates[tw - minTemplateWidth])), false); } } }
public void compute( final float[] scores, final float[] whiteObservations, final float[] blackObservations, final int sequenceLength) { int gridSizeX = (int) Math.ceil(((double) sequenceLength) / (blockSizeX * rollX)); int extendedSeqLength = gridSizeX * (blockSizeX * rollX); d_Ow.write( queue, pc.capture( Pointer.pointerToFloats( CudaUtil.extendWithZeros( whiteObservations, (extendedSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT))), false); d_Ob.write( queue, pc.capture( Pointer.pointerToFloats( CudaUtil.extendWithZeros( blackObservations, (extendedSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT))), false); queue.enqueueBarrier(); for (int tw = minTemplateWidth; tw <= maxTemplateWidth; ++tw) { if (templateNumIndices[tw - minTemplateWidth] > 0) { int gridSizeY = (int) Math.ceil(((double) templateNumIndices[tw - minTemplateWidth]) / blockSizeY); CLKernel computeKernel = program.createKernel("compute_emissions_" + tw); computeKernel.setArgs( templateIndicesOffsets[tw - minTemplateWidth] * sequenceLength, sequenceLength, templateNumIndices[tw - minTemplateWidth], d_Tw[tw - minTemplateWidth], d_Tb[tw - minTemplateWidth], d_Ow, d_Ob, d_scores); computeKernel.enqueueNDRange( queue, new int[] {gridSizeX * blockSizeX, gridSizeY * blockSizeY}, new int[] {blockSizeX, blockSizeY}); } } queue.enqueueBarrier(); d_scores.read(queue).getFloats(scores); }
public void shutdown() { d_Ow.release(); d_Ob.release(); d_scores.release(); for (int tw = minTemplateWidth; tw <= maxTemplateWidth; ++tw) { if (templateNumIndices[tw - minTemplateWidth] > 0) { d_Tw[tw - minTemplateWidth].release(); d_Tb[tw - minTemplateWidth].release(); } } pc.releaseAll(); }