Beispiel #1
0
  /**
   * Calculates the activation of the visible : sigmoid(v * W + hbias)
   *
   * @param v the visible layer
   * @return the approximated activations of the visible layer
   */
  public INDArray propUp(INDArray v, boolean training) {
    INDArray W = getParam(PretrainParamInitializer.WEIGHT_KEY);
    if (training) {
      if (conf.isUseDropConnect() && training) {
        if (conf.getLayer().getDropOut() > 0 && training) {
          W = Dropout.applyDropConnect(this, DefaultParamInitializer.WEIGHT_KEY);
        }
      }
    }
    INDArray hBias = getParam(PretrainParamInitializer.BIAS_KEY);

    if (layerConf().getVisibleUnit() == org.deeplearning4j.nn.conf.layers.RBM.VisibleUnit.GAUSSIAN)
      this.sigma = v.var(0).divi(input.rows());

    INDArray preSig = v.mmul(W).addiRowVector(hBias);

    switch (layerConf().getHiddenUnit()) {
      case RECTIFIED:
        preSig = max(preSig, 0.0);
        return preSig;
      case GAUSSIAN:
        preSig.addi(Nd4j.randn(preSig.rows(), preSig.columns(), rng));
        return preSig;
      case BINARY:
        return sigmoid(preSig);
      case SOFTMAX:
        return Nd4j.getExecutioner()
            .execAndReturn(Nd4j.getOpFactory().createTransform("softmax", preSig));
      default:
        throw new IllegalStateException(
            "Hidden unit type should either be binary, gaussian, or rectified linear");
    }
  }
Beispiel #2
0
 /**
  * Reconstructs the visible INPUT. A reconstruction is a propdown of the reconstructed hidden
  * input.
  *
  * @param training true or false
  * @return the reconstruction of the visible input
  */
 @Override
 public INDArray activate(boolean training) {
   if (training && conf.getLayer().getDropOut() > 0.0) {
     input = Dropout.applyDropout(input, conf.getLayer().getDropOut(), dropoutMask);
   }
   // reconstructed: propUp ----> hidden propDown to transform
   INDArray propUp = propUp(input, training);
   return propUp;
 }
Beispiel #3
0
  /** Returns activations array: {output,rucZs,rucAs} in that order. */
  private INDArray[] activateHelper(boolean training, INDArray prevOutputActivations) {

    INDArray inputWeights =
        getParam(GRUParamInitializer.INPUT_WEIGHT_KEY); // Shape: [n^(L-1),3*n^L], order: [wr,wu,wc]
    INDArray recurrentWeights =
        getParam(GRUParamInitializer.RECURRENT_WEIGHT_KEY); // Shape: [n^L,3*n^L]; order: [wR,wU,wC]
    INDArray biases = getParam(GRUParamInitializer.BIAS_KEY); // Shape: [1,3*n^L]; order: [br,bu,bc]

    boolean is2dInput =
        input.rank() < 3; // Edge case of T=1, may have shape [m,nIn], equiv. to [m,nIn,1]
    int timeSeriesLength = (is2dInput ? 1 : input.size(2));
    int hiddenLayerSize = recurrentWeights.size(0);
    int miniBatchSize = input.size(0);

    int layerSize = hiddenLayerSize;
    INDArray wr = inputWeights.get(NDArrayIndex.all(), interval(0, layerSize));
    INDArray wu = inputWeights.get(NDArrayIndex.all(), interval(layerSize, 2 * layerSize));
    INDArray wc = inputWeights.get(NDArrayIndex.all(), interval(2 * layerSize, 3 * layerSize));
    INDArray wR = recurrentWeights.get(NDArrayIndex.all(), interval(0, layerSize));
    INDArray wU = recurrentWeights.get(NDArrayIndex.all(), interval(layerSize, 2 * layerSize));
    INDArray wC = recurrentWeights.get(NDArrayIndex.all(), interval(2 * layerSize, 3 * layerSize));
    INDArray br = biases.get(NDArrayIndex.point(0), interval(0, layerSize));
    INDArray bu = biases.get(NDArrayIndex.point(0), interval(layerSize, 2 * layerSize));
    INDArray bc = biases.get(NDArrayIndex.point(0), interval(2 * layerSize, 3 * layerSize));
    //		INDArray wRAndU = recurrentWeights.get(NDArrayIndex.all(),NDArrayIndex.interval(0,
    // 2*hiddenLayerSize));
    //		INDArray wC =
    // recurrentWeights.get(NDArrayIndex.all(),NDArrayIndex.interval(2*hiddenLayerSize,3*hiddenLayerSize));

    // Apply dropconnect to input (not recurrent) weights only:
    if (conf.isUseDropConnect() && training) {
      if (conf.getLayer().getDropOut() > 0) {
        inputWeights = Dropout.applyDropConnect(this, GRUParamInitializer.INPUT_WEIGHT_KEY);
      }
    }

    // Allocate arrays for activations:
    INDArray outputActivations = Nd4j.zeros(miniBatchSize, hiddenLayerSize, timeSeriesLength);
    INDArray rucZs =
        Nd4j.zeros(
            miniBatchSize,
            3 * hiddenLayerSize,
            timeSeriesLength); // zs for reset gate, update gate, candidate activation
    INDArray rucAs =
        Nd4j.zeros(miniBatchSize, 3 * hiddenLayerSize, timeSeriesLength); // activations for above

    if (prevOutputActivations == null)
      prevOutputActivations = Nd4j.zeros(miniBatchSize, hiddenLayerSize);
    for (int t = 0; t < timeSeriesLength; t++) {
      INDArray prevLayerInputSlice =
          (is2dInput
              ? input
              : input.tensorAlongDimension(
                  t, 1,
                  0)); // [Expected shape: [m,nIn]. Also deals with edge case of T=1, with 'time
                       // series' data of shape [m,nIn], equiv. to [m,nIn,1]
      if (t > 0)
        prevOutputActivations =
            outputActivations.tensorAlongDimension(t - 1, 1, 0); // Shape: [m,nL]

      /* This commented out implementation: should be same as 'naive' implementation that follows.
       * Using naive approach at present for debugging purposes
       *
      //Calculate reset gate, update gate and candidate zs
      	//First: inputs + biases for all (reset gate, update gate, candidate activation)
      INDArray zs = prevLayerInputSlice.mmul(inputWeights).addiRowVector(biases);	//Shape: [m,3n^L]

      //Recurrent weights * prevInput for reset and update gates:
      INDArray zrAndu = zs.get(NDArrayIndex.all(),NDArrayIndex.interval(0, 2*hiddenLayerSize));
      zrAndu.addi(prevOutputActivations.mmul(wRAndU));	//zr and zu now have all components

      INDArray as = zs.dup();
      INDArray arAndu = as.get(NDArrayIndex.all(),NDArrayIndex.interval(0, 2*hiddenLayerSize));
      Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("sigmoid", arAndu));	//Sigmoid for both reset and update gates

      //Recurrent component of candidate z: (previously: zc has only input and bias components)
      INDArray ar = as.get(NDArrayIndex.all(),NDArrayIndex.interval(0, hiddenLayerSize));
      INDArray zc = zs.get(NDArrayIndex.all(),NDArrayIndex.interval(2*hiddenLayerSize, 3*hiddenLayerSize));
      zc.addi(ar.mul(prevOutputActivations).mmul(wC));

      INDArray ac = as.get(NDArrayIndex.all(),NDArrayIndex.interval(2*hiddenLayerSize, 3*hiddenLayerSize));
      ac.assign(zc);
      Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf.getLayer().getActivationFunction(),ac));

      //Finally, calculate output activation:
      INDArray au = as.get(NDArrayIndex.all(),NDArrayIndex.interval(hiddenLayerSize, 2*hiddenLayerSize));
      INDArray outputASlice = au.mul(prevOutputActivations).addi(au.rsub(1).muli(ac));
      */

      INDArray zs = Nd4j.zeros(miniBatchSize, 3 * hiddenLayerSize);
      INDArray as = Nd4j.zeros(miniBatchSize, 3 * hiddenLayerSize);

      INDArray zr =
          prevLayerInputSlice.mmul(wr).addi(prevOutputActivations.mmul(wR)).addiRowVector(br);
      INDArray ar =
          Nd4j.getExecutioner()
              .execAndReturn(Nd4j.getOpFactory().createTransform("sigmoid", zr.dup()));
      zs.get(NDArrayIndex.all(), NDArrayIndex.interval(0, hiddenLayerSize)).assign(zr);
      as.get(NDArrayIndex.all(), NDArrayIndex.interval(0, hiddenLayerSize)).assign(ar);

      INDArray zu =
          prevLayerInputSlice.mmul(wu).addi(prevOutputActivations.mmul(wU)).addiRowVector(bu);
      INDArray au =
          Nd4j.getExecutioner()
              .execAndReturn(Nd4j.getOpFactory().createTransform("sigmoid", zu.dup()));
      zs.get(NDArrayIndex.all(), NDArrayIndex.interval(hiddenLayerSize, 2 * hiddenLayerSize))
          .assign(zu);
      as.get(NDArrayIndex.all(), NDArrayIndex.interval(hiddenLayerSize, 2 * hiddenLayerSize))
          .assign(au);

      INDArray zc =
          prevLayerInputSlice
              .mmul(wc)
              .addi(prevOutputActivations.mul(ar).mmul(wC))
              .addiRowVector(bc);
      INDArray ac =
          Nd4j.getExecutioner()
              .execAndReturn(
                  Nd4j.getOpFactory()
                      .createTransform(conf.getLayer().getActivationFunction(), zc.dup()));
      zs.get(NDArrayIndex.all(), NDArrayIndex.interval(2 * hiddenLayerSize, 3 * hiddenLayerSize))
          .assign(zc);
      as.get(NDArrayIndex.all(), NDArrayIndex.interval(2 * hiddenLayerSize, 3 * hiddenLayerSize))
          .assign(ac);

      INDArray aOut = au.mul(prevOutputActivations).addi(au.rsub(1).mul(ac));

      rucZs.tensorAlongDimension(t, 1, 0).assign(zs);
      rucAs.tensorAlongDimension(t, 1, 0).assign(as);
      outputActivations.tensorAlongDimension(t, 1, 0).assign(aOut);
    }

    return new INDArray[] {outputActivations, rucZs, rucAs};
  }