/** * Calculates the activation of the visible : sigmoid(v * W + hbias) * * @param v the visible layer * @return the approximated activations of the visible layer */ public INDArray propUp(INDArray v, boolean training) { INDArray W = getParam(PretrainParamInitializer.WEIGHT_KEY); if (training) { if (conf.isUseDropConnect() && training) { if (conf.getLayer().getDropOut() > 0 && training) { W = Dropout.applyDropConnect(this, DefaultParamInitializer.WEIGHT_KEY); } } } INDArray hBias = getParam(PretrainParamInitializer.BIAS_KEY); if (layerConf().getVisibleUnit() == org.deeplearning4j.nn.conf.layers.RBM.VisibleUnit.GAUSSIAN) this.sigma = v.var(0).divi(input.rows()); INDArray preSig = v.mmul(W).addiRowVector(hBias); switch (layerConf().getHiddenUnit()) { case RECTIFIED: preSig = max(preSig, 0.0); return preSig; case GAUSSIAN: preSig.addi(Nd4j.randn(preSig.rows(), preSig.columns(), rng)); return preSig; case BINARY: return sigmoid(preSig); case SOFTMAX: return Nd4j.getExecutioner() .execAndReturn(Nd4j.getOpFactory().createTransform("softmax", preSig)); default: throw new IllegalStateException( "Hidden unit type should either be binary, gaussian, or rectified linear"); } }
/** * Reconstructs the visible INPUT. A reconstruction is a propdown of the reconstructed hidden * input. * * @param training true or false * @return the reconstruction of the visible input */ @Override public INDArray activate(boolean training) { if (training && conf.getLayer().getDropOut() > 0.0) { input = Dropout.applyDropout(input, conf.getLayer().getDropOut(), dropoutMask); } // reconstructed: propUp ----> hidden propDown to transform INDArray propUp = propUp(input, training); return propUp; }
/** Returns activations array: {output,rucZs,rucAs} in that order. */ private INDArray[] activateHelper(boolean training, INDArray prevOutputActivations) { INDArray inputWeights = getParam(GRUParamInitializer.INPUT_WEIGHT_KEY); // Shape: [n^(L-1),3*n^L], order: [wr,wu,wc] INDArray recurrentWeights = getParam(GRUParamInitializer.RECURRENT_WEIGHT_KEY); // Shape: [n^L,3*n^L]; order: [wR,wU,wC] INDArray biases = getParam(GRUParamInitializer.BIAS_KEY); // Shape: [1,3*n^L]; order: [br,bu,bc] boolean is2dInput = input.rank() < 3; // Edge case of T=1, may have shape [m,nIn], equiv. to [m,nIn,1] int timeSeriesLength = (is2dInput ? 1 : input.size(2)); int hiddenLayerSize = recurrentWeights.size(0); int miniBatchSize = input.size(0); int layerSize = hiddenLayerSize; INDArray wr = inputWeights.get(NDArrayIndex.all(), interval(0, layerSize)); INDArray wu = inputWeights.get(NDArrayIndex.all(), interval(layerSize, 2 * layerSize)); INDArray wc = inputWeights.get(NDArrayIndex.all(), interval(2 * layerSize, 3 * layerSize)); INDArray wR = recurrentWeights.get(NDArrayIndex.all(), interval(0, layerSize)); INDArray wU = recurrentWeights.get(NDArrayIndex.all(), interval(layerSize, 2 * layerSize)); INDArray wC = recurrentWeights.get(NDArrayIndex.all(), interval(2 * layerSize, 3 * layerSize)); INDArray br = biases.get(NDArrayIndex.point(0), interval(0, layerSize)); INDArray bu = biases.get(NDArrayIndex.point(0), interval(layerSize, 2 * layerSize)); INDArray bc = biases.get(NDArrayIndex.point(0), interval(2 * layerSize, 3 * layerSize)); // INDArray wRAndU = recurrentWeights.get(NDArrayIndex.all(),NDArrayIndex.interval(0, // 2*hiddenLayerSize)); // INDArray wC = // recurrentWeights.get(NDArrayIndex.all(),NDArrayIndex.interval(2*hiddenLayerSize,3*hiddenLayerSize)); // Apply dropconnect to input (not recurrent) weights only: if (conf.isUseDropConnect() && training) { if (conf.getLayer().getDropOut() > 0) { inputWeights = Dropout.applyDropConnect(this, GRUParamInitializer.INPUT_WEIGHT_KEY); } } // Allocate arrays for activations: INDArray outputActivations = Nd4j.zeros(miniBatchSize, hiddenLayerSize, timeSeriesLength); INDArray rucZs = Nd4j.zeros( miniBatchSize, 3 * hiddenLayerSize, timeSeriesLength); // zs for reset gate, update gate, candidate activation INDArray rucAs = Nd4j.zeros(miniBatchSize, 3 * hiddenLayerSize, timeSeriesLength); // activations for above if (prevOutputActivations == null) prevOutputActivations = Nd4j.zeros(miniBatchSize, hiddenLayerSize); for (int t = 0; t < timeSeriesLength; t++) { INDArray prevLayerInputSlice = (is2dInput ? input : input.tensorAlongDimension( t, 1, 0)); // [Expected shape: [m,nIn]. Also deals with edge case of T=1, with 'time // series' data of shape [m,nIn], equiv. to [m,nIn,1] if (t > 0) prevOutputActivations = outputActivations.tensorAlongDimension(t - 1, 1, 0); // Shape: [m,nL] /* This commented out implementation: should be same as 'naive' implementation that follows. * Using naive approach at present for debugging purposes * //Calculate reset gate, update gate and candidate zs //First: inputs + biases for all (reset gate, update gate, candidate activation) INDArray zs = prevLayerInputSlice.mmul(inputWeights).addiRowVector(biases); //Shape: [m,3n^L] //Recurrent weights * prevInput for reset and update gates: INDArray zrAndu = zs.get(NDArrayIndex.all(),NDArrayIndex.interval(0, 2*hiddenLayerSize)); zrAndu.addi(prevOutputActivations.mmul(wRAndU)); //zr and zu now have all components INDArray as = zs.dup(); INDArray arAndu = as.get(NDArrayIndex.all(),NDArrayIndex.interval(0, 2*hiddenLayerSize)); Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("sigmoid", arAndu)); //Sigmoid for both reset and update gates //Recurrent component of candidate z: (previously: zc has only input and bias components) INDArray ar = as.get(NDArrayIndex.all(),NDArrayIndex.interval(0, hiddenLayerSize)); INDArray zc = zs.get(NDArrayIndex.all(),NDArrayIndex.interval(2*hiddenLayerSize, 3*hiddenLayerSize)); zc.addi(ar.mul(prevOutputActivations).mmul(wC)); INDArray ac = as.get(NDArrayIndex.all(),NDArrayIndex.interval(2*hiddenLayerSize, 3*hiddenLayerSize)); ac.assign(zc); Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf.getLayer().getActivationFunction(),ac)); //Finally, calculate output activation: INDArray au = as.get(NDArrayIndex.all(),NDArrayIndex.interval(hiddenLayerSize, 2*hiddenLayerSize)); INDArray outputASlice = au.mul(prevOutputActivations).addi(au.rsub(1).muli(ac)); */ INDArray zs = Nd4j.zeros(miniBatchSize, 3 * hiddenLayerSize); INDArray as = Nd4j.zeros(miniBatchSize, 3 * hiddenLayerSize); INDArray zr = prevLayerInputSlice.mmul(wr).addi(prevOutputActivations.mmul(wR)).addiRowVector(br); INDArray ar = Nd4j.getExecutioner() .execAndReturn(Nd4j.getOpFactory().createTransform("sigmoid", zr.dup())); zs.get(NDArrayIndex.all(), NDArrayIndex.interval(0, hiddenLayerSize)).assign(zr); as.get(NDArrayIndex.all(), NDArrayIndex.interval(0, hiddenLayerSize)).assign(ar); INDArray zu = prevLayerInputSlice.mmul(wu).addi(prevOutputActivations.mmul(wU)).addiRowVector(bu); INDArray au = Nd4j.getExecutioner() .execAndReturn(Nd4j.getOpFactory().createTransform("sigmoid", zu.dup())); zs.get(NDArrayIndex.all(), NDArrayIndex.interval(hiddenLayerSize, 2 * hiddenLayerSize)) .assign(zu); as.get(NDArrayIndex.all(), NDArrayIndex.interval(hiddenLayerSize, 2 * hiddenLayerSize)) .assign(au); INDArray zc = prevLayerInputSlice .mmul(wc) .addi(prevOutputActivations.mul(ar).mmul(wC)) .addiRowVector(bc); INDArray ac = Nd4j.getExecutioner() .execAndReturn( Nd4j.getOpFactory() .createTransform(conf.getLayer().getActivationFunction(), zc.dup())); zs.get(NDArrayIndex.all(), NDArrayIndex.interval(2 * hiddenLayerSize, 3 * hiddenLayerSize)) .assign(zc); as.get(NDArrayIndex.all(), NDArrayIndex.interval(2 * hiddenLayerSize, 3 * hiddenLayerSize)) .assign(ac); INDArray aOut = au.mul(prevOutputActivations).addi(au.rsub(1).mul(ac)); rucZs.tensorAlongDimension(t, 1, 0).assign(zs); rucAs.tensorAlongDimension(t, 1, 0).assign(as); outputActivations.tensorAlongDimension(t, 1, 0).assign(aOut); } return new INDArray[] {outputActivations, rucZs, rucAs}; }