@Test public void testGravesLSTMInit() { int nIn = 8; int nOut = 25; int nHiddenUnits = 17; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .list(2) .layer( 0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder() .nIn(nIn) .nOut(nHiddenUnits) .weightInit(WeightInit.DISTRIBUTION) .activation("tanh") .build()) .layer( 1, new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) .nIn(nHiddenUnits) .nOut(nOut) .weightInit(WeightInit.DISTRIBUTION) .activation("tanh") .build()) .build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); // Ensure that we have the correct number weights and biases, that these have correct shape etc. Layer layer = network.getLayer(0); assertTrue(layer instanceof GravesLSTM); Map<String, INDArray> paramTable = layer.paramTable(); assertTrue(paramTable.size() == 3); // 2 sets of weights, 1 set of biases INDArray recurrentWeights = paramTable.get(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY); assertArrayEquals( recurrentWeights.shape(), new int[] { nHiddenUnits, 4 * nHiddenUnits + 3 }); // Should be shape: [layerSize,4*layerSize+3] INDArray inputWeights = paramTable.get(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY); assertArrayEquals( inputWeights.shape(), new int[] {nIn, 4 * nHiddenUnits}); // Should be shape: [nIn,4*layerSize] INDArray biases = paramTable.get(GravesLSTMParamInitializer.BIAS_KEY); assertArrayEquals( biases.shape(), new int[] {1, 4 * nHiddenUnits}); // Should be shape: [1,4*layerSize] // Want forget gate biases to be initialized to > 0. See parameter initializer for details INDArray forgetGateBiases = biases.get(new INDArrayIndex[] {NDArrayIndex.interval(nHiddenUnits, 2 * nHiddenUnits)}); assertTrue(forgetGateBiases.gt(0).sum(Integer.MAX_VALUE).getDouble(0) == nHiddenUnits); int nParams = recurrentWeights.length() + inputWeights.length() + biases.length(); assertTrue(nParams == layer.numParams()); }
@Test public void testGRUInit() { int nIn = 8; int nOut = 25; int nHiddenUnits = 17; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .list(2) .layer( 0, new org.deeplearning4j.nn.conf.layers.GRU.Builder() .nIn(nIn) .nOut(nHiddenUnits) .weightInit(WeightInit.DISTRIBUTION) .build()) .layer( 1, new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) .nIn(nHiddenUnits) .nOut(nOut) .weightInit(WeightInit.DISTRIBUTION) .build()) .build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); // Ensure that we have the correct number weights and biases, that these have correct shape etc. Layer layer = network.getLayer(0); assertTrue(layer instanceof GRU); Map<String, INDArray> paramTable = layer.paramTable(); assertTrue(paramTable.size() == 3); // 2 sets of weights, 1 set of biases INDArray recurrentWeights = paramTable.get(GRUParamInitializer.RECURRENT_WEIGHT_KEY); assertArrayEquals( recurrentWeights.shape(), new int[] {nHiddenUnits, 3 * nHiddenUnits}); // Should be shape: [layerSize,3*layerSize] INDArray inputWeights = paramTable.get(GRUParamInitializer.INPUT_WEIGHT_KEY); assertArrayEquals( inputWeights.shape(), new int[] {nIn, 3 * nHiddenUnits}); // Should be shape: [nIn,3*layerSize] INDArray biases = paramTable.get(GRUParamInitializer.BIAS_KEY); assertArrayEquals( biases.shape(), new int[] {1, 3 * nHiddenUnits}); // Should be shape: [1,3*layerSize] int nParams = recurrentWeights.length() + inputWeights.length() + biases.length(); assertTrue(nParams == layer.numParams()); }