コード例 #1
0
ファイル: BaseCudaDataBuffer.java プロジェクト: nchinth/nd4j
  @Override
  public void set(Pointer pointer) {

    modified.set(true);

    if (dataType() == DataBuffer.Type.DOUBLE) {
      JCublas2.cublasDcopy(
          ContextHolder.getInstance().getHandle(), (int) length(), pointer, 1, getHostPointer(), 1);
    } else {
      JCublas2.cublasScopy(
          ContextHolder.getInstance().getHandle(), (int) length(), pointer, 1, getHostPointer(), 1);
    }
  }
コード例 #2
0
ファイル: BaseCudaDataBuffer.java プロジェクト: nchinth/nd4j
  @Override
  public void copyToHost(int offset, int length) {
    DevicePointerInfo devicePointerInfo =
        pointersToContexts.get(Thread.currentThread().getName(), new Pair<>(offset, length));
    if (devicePointerInfo == null)
      throw new IllegalStateException("No pointer found for offset " + offset);
    // prevent inconsistent pointers
    if (devicePointerInfo.getOffset() != offset)
      throw new IllegalStateException(
          "Device pointer offset didn't match specified offset in pointer map");

    if (devicePointerInfo != null) {
      ContextHolder.syncStream();
      int deviceStride = devicePointerInfo.getStride();
      int deviceOffset = devicePointerInfo.getOffset();
      long deviceLength = devicePointerInfo.getLength();
      if (deviceOffset == 0 && length < length()) {
        /**
         * The way the data works out the stride for retrieving the data should be 1.
         *
         * <p>The device stride should be used for resetting the data.
         *
         * <p>This is for the edge case where the offset is zero and the length of the pointer is <
         * the actual buffer length itself.
         */
        JCublas2.cublasGetVectorAsync(
            length,
            getElementSize(),
            devicePointerInfo.getPointer().withByteOffset(offset * getElementSize()),
            deviceStride,
            getHostPointer(deviceOffset),
            deviceStride,
            ContextHolder.getInstance().getCudaStream());
      } else {
        JCublas2.cublasGetVectorAsync(
            (int) deviceLength,
            getElementSize(),
            devicePointerInfo.getPointer().withByteOffset(offset * getElementSize()),
            deviceStride,
            getHostPointer(deviceOffset),
            deviceStride,
            ContextHolder.getInstance().getCudaStream());
      }

      ContextHolder.syncStream();

    } else throw new IllegalStateException("No offset found to copy");
  }
コード例 #3
0
ファイル: BaseCudaDataBuffer.java プロジェクト: nchinth/nd4j
  /**
   * Set an individual element
   *
   * @param index the index of the element
   * @param from the element to get data from
   */
  protected void set(int index, int length, Pointer from, int inc) {

    modified.set(true);

    int offset = getElementSize() * index;
    if (offset >= length() * getElementSize())
      throw new IllegalArgumentException(
          "Illegal offset " + offset + " with index of " + index + " and length " + length());

    JCublas2.cublasSetVectorAsync(
        length,
        getElementSize(),
        from,
        inc,
        getHostPointer().withByteOffset(offset),
        1,
        ContextHolder.getInstance().getCudaStream());

    ContextHolder.syncStream();
  }
コード例 #4
0
ファイル: BaseCudaDataBuffer.java プロジェクト: nchinth/nd4j
  @Override
  public boolean freeDevicePointer(int offset, int length) {
    String name = Thread.currentThread().getName();
    DevicePointerInfo devicePointerInfo = pointersToContexts.get(name, offset);

    // nothing to free, there was no copy. Only the gpu pointer was reused with a different offset.
    if (offset != 0) pointersToContexts.remove(name, offset);
    else if (offset == 0 && isPersist) {
      return true;
    } else if (devicePointerInfo != null && !freed.get()) {
      allocated.addAndGet(-devicePointerInfo.getLength());
      log.trace("freeing {} bytes, total: {}", devicePointerInfo.getLength(), allocated.get());
      ContextHolder.getInstance().getMemoryStrategy().free(this, offset, length);
      freed.set(true);
      copied.remove(name);
      pointersToContexts.remove(name, offset);
      return true;
    }

    return false;
  }
コード例 #5
0
ファイル: BaseCudaDataBuffer.java プロジェクト: nchinth/nd4j
  @Override
  public Pointer getDevicePointer(INDArray arr, int stride, int offset, int length) {
    String name = Thread.currentThread().getName();
    DevicePointerInfo devicePointerInfo = pointersToContexts.get(name, new Pair<>(offset, length));
    if (devicePointerInfo == null) {
      int devicePointerLength = getElementSize() * length;
      allocated.addAndGet(devicePointerLength);
      totalAllocated.addAndGet(devicePointerLength);
      log.trace(
          "Allocating {} bytes, total: {}, overall: {}",
          devicePointerLength,
          allocated.get(),
          totalAllocated);
      // check its the same object
      if (arr.data() != this) {
        throw new IllegalArgumentException(
            "Unable to get pointer for array that doesn't have this as the buffer");
      }
      int compareLength = arr instanceof IComplexNDArray ? arr.length() * 2 : arr.length();
      /**
       * Add zero first no matter what. Allocate the whole buffer on the gpu and use offsets for any
       * other pointers that come in. This will allow us to set device pointers with offsets
       *
       * <p>with no extra allocation.
       *
       * <p>Notice here we ignore the length of the actual array.
       *
       * <p>We are going to allocate the whole buffer on the gpu only once.
       */
      if (!pointersToContexts.contains(name, new Pair<>(0, this.length))) {
        devicePointerInfo =
            (DevicePointerInfo)
                ContextHolder.getInstance()
                    .getConf()
                    .getMemoryStrategy()
                    .alloc(this, 1, 0, this.length);

        pointersToContexts.put(name, new Pair<>(0, this.length), devicePointerInfo);
      }

      if (offset > 0) {
        /**
         * Store the length for the offset of the pointer. Return the original pointer with an
         * offset (these pointers can't be reused?)
         *
         * <p>With the device pointer info, we want to store the original pointer. When retrieving
         * the vector from the gpu later, we will use the recorded offset.
         *
         * <p>Due to gpu instability (please correct me if I'm wrong here) we can't seem to reuse
         * the pointers with the offset specified, therefore it is desirable to recreate this
         * pointer later.
         *
         * <p>This will prevent extra allocation as well as inform the length for retrieving data
         * from the gpu for this particular offset and buffer.
         */
        DevicePointerInfo info2 = pointersToContexts.get(name, new Pair<>(0, this.length));
        if (info2 == null)
          throw new IllegalStateException(
              "No pointer found for name "
                  + name
                  + " and offset/length "
                  + offset
                  + " / "
                  + length);
        Pointer zero = info2.getPointer();
        Pointer ret = info2.getPointer().withByteOffset(offset * getElementSize());
        devicePointerInfo = new DevicePointerInfo(zero, length, stride, offset);
        pointersToContexts.put(name, new Pair<>(offset, compareLength), devicePointerInfo);
        return ret;

      } else if (offset == 0 && compareLength < arr.data().length()) {
        DevicePointerInfo info2 = pointersToContexts.get(name, new Pair<>(0, this.length));
        DevicePointerInfo info3 =
            new DevicePointerInfo(
                info2.getPointer(), this.length, BlasBufferUtil.getBlasStride(arr), arr.offset());
        int compareLength2 = arr instanceof IComplexNDArray ? arr.length() * 2 : arr.length();

        /**
         * Need a pointer that points at the buffer but doesnt extend all the way to the end. This
         * is for data like the first row of a matrix that has zero offset but does not extend all
         * the way to the end of the buffer.
         */
        pointersToContexts.put(name, new Pair<>(offset, compareLength2), info3);
        return info3.getPointer();
      }

      freed.set(false);
    }

    /**
     * Return the device pointer with the specified offset. Regardless of whether the device pointer
     * has been allocated, we need to return with it respect to the specified array not the array's
     * underlying buffer.
     */
    if (devicePointerInfo == null && offset == 0 && length < length()) {
      DevicePointerInfo origin =
          pointersToContexts.get(Thread.currentThread().getName(), new Pair<>(0, length()));
      DevicePointerInfo newInfo = new DevicePointerInfo(origin.getPointer(), length, stride, 0);
      return newInfo.getPointer();
    }

    return devicePointerInfo.getPointer().withByteOffset(offset * getElementSize());
  }
コード例 #6
0
ファイル: BaseCudaDataBuffer.java プロジェクト: nchinth/nd4j
  @Override
  public Pointer getDevicePointer(int stride, int offset, int length) {
    String name = Thread.currentThread().getName();
    DevicePointerInfo devicePointerInfo = pointersToContexts.get(name, new Pair<>(offset, length));
    if (devicePointerInfo == null) {
      int devicePointerLength = getElementSize() * length;
      allocated.addAndGet(devicePointerLength);
      totalAllocated.addAndGet(devicePointerLength);
      log.trace(
          "Allocating {} bytes, total: {}, overall: {}",
          devicePointerLength,
          allocated.get(),
          totalAllocated);
      if (devicePointerInfo == null) {
        /**
         * Add zero first no matter what. Allocate the whole buffer on the gpu and use offsets for
         * any other pointers that come in. This will allow us to set device pointers with offsets
         *
         * <p>with no extra allocation.
         *
         * <p>Notice here we ignore the length of the actual array.
         *
         * <p>We are going to allocate the whole buffer on the gpu only once.
         */
        if (!pointersToContexts.contains(name, new Pair<>(0, this.length))) {

          devicePointerInfo =
              (DevicePointerInfo)
                  ContextHolder.getInstance()
                      .getConf()
                      .getMemoryStrategy()
                      .alloc(this, 1, 0, this.length);

          pointersToContexts.put(name, new Pair<>(0, this.length), devicePointerInfo);
        }

        if (offset > 0) {
          /**
           * Store the length for the offset of the pointer. Return the original pointer with an
           * offset (these pointers can't be reused?)
           *
           * <p>With the device pointer info, we want to store the original pointer. When retrieving
           * the vector from the gpu later, we will use the recorded offset.
           *
           * <p>Due to gpu instability (please correct me if I'm wrong here) we can't seem to reuse
           * the pointers with the offset specified, therefore it is desirable to recreate this
           * pointer later.
           *
           * <p>This will prevent extra allocation as well as inform the length for retrieving data
           * from the gpu for this particular offset and buffer.
           */
          Pointer zero = pointersToContexts.get(name, 0).getPointer();
          Pointer ret =
              pointersToContexts
                  .get(name, 0)
                  .getPointer()
                  .withByteOffset(offset * getElementSize());
          devicePointerInfo = new DevicePointerInfo(zero, length, stride, offset);
          pointersToContexts.put(name, new Pair<>(offset, length), devicePointerInfo);
          return ret;
        }
      }

      freed.set(false);
    }

    /**
     * Return the device pointer with the specified offset. Regardless of whether the device pointer
     * has been allocated, we need to return with it respect to the specified array not the array's
     * underlying buffer.
     */
    return devicePointerInfo.getPointer().withByteOffset(offset * getElementSize());
  }