private static void initCL(CLDevice.Type clType) throws Exception { /** * Hole OpenCL-Plattformen z.B. AMD APP, NVIDIA CUDA ** */ platforms = JavaCL.listPlatforms(); /** * Hole OpenCL-Device des geforderten Typs z.B. GPU, CPU ** */ EnumSet<CLDevice.Type> types = EnumSet.of(clType); devices = new ArrayList<CLDevice>(); CLDevice[] devTmp; for (CLPlatform platform : platforms) { devTmp = platform.listDevices(types, true); devices.addAll(Arrays.asList(devTmp)); } /** * Erstelle OpenCL-Context und CommandQueue ** */ devTmp = new CLDevice[devices.size()]; context = JavaCL.createContext(null, devices.toArray(devTmp)); cmdQ = context.createDefaultQueue(QueueProperties.ProfilingEnable); /** * OpenCL-Quellcode einlesen ** */ String src = readFile(KERNEL_PATH); // String src = KERNEL_SRC; /** * OpenCL-Programm aus Quellcode erstellen ** */ program = context.createProgram(src); try { program.build(); } catch (CLBuildException err) { Logger.logError(CLAZZ, "Build log for \"" + devices.get(0) + "\n" + err.getMessage()); throw err; } /** * OpenCL-Kernel laden ** */ kernel = program.createKernel("addVec"); }
public void compute( final float[] scores, final float[] whiteObservations, final float[] blackObservations, final int sequenceLength) { int gridSizeX = (int) Math.ceil(((double) sequenceLength) / (blockSizeX * rollX)); int extendedSeqLength = gridSizeX * (blockSizeX * rollX); d_Ow.write( queue, pc.capture( Pointer.pointerToFloats( CudaUtil.extendWithZeros( whiteObservations, (extendedSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT))), false); d_Ob.write( queue, pc.capture( Pointer.pointerToFloats( CudaUtil.extendWithZeros( blackObservations, (extendedSeqLength + maxTemplateWidth - 1) * CharacterTemplate.LINE_HEIGHT))), false); queue.enqueueBarrier(); for (int tw = minTemplateWidth; tw <= maxTemplateWidth; ++tw) { if (templateNumIndices[tw - minTemplateWidth] > 0) { int gridSizeY = (int) Math.ceil(((double) templateNumIndices[tw - minTemplateWidth]) / blockSizeY); CLKernel computeKernel = program.createKernel("compute_emissions_" + tw); computeKernel.setArgs( templateIndicesOffsets[tw - minTemplateWidth] * sequenceLength, sequenceLength, templateNumIndices[tw - minTemplateWidth], d_Tw[tw - minTemplateWidth], d_Tb[tw - minTemplateWidth], d_Ow, d_Ob, d_scores); computeKernel.enqueueNDRange( queue, new int[] {gridSizeX * blockSizeX, gridSizeY * blockSizeY}, new int[] {blockSizeX, blockSizeY}); } } queue.enqueueBarrier(); d_scores.read(queue).getFloats(scores); }