예제 #1
0
  @Test
  public void testGravesLSTMInit() {
    int nIn = 8;
    int nOut = 25;
    int nHiddenUnits = 17;
    MultiLayerConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .list(2)
            .layer(
                0,
                new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder()
                    .nIn(nIn)
                    .nOut(nHiddenUnits)
                    .weightInit(WeightInit.DISTRIBUTION)
                    .activation("tanh")
                    .build())
            .layer(
                1,
                new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS)
                    .nIn(nHiddenUnits)
                    .nOut(nOut)
                    .weightInit(WeightInit.DISTRIBUTION)
                    .activation("tanh")
                    .build())
            .build();
    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();

    // Ensure that we have the correct number weights and biases, that these have correct shape etc.
    Layer layer = network.getLayer(0);
    assertTrue(layer instanceof GravesLSTM);

    Map<String, INDArray> paramTable = layer.paramTable();
    assertTrue(paramTable.size() == 3); // 2 sets of weights, 1 set of biases

    INDArray recurrentWeights = paramTable.get(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
    assertArrayEquals(
        recurrentWeights.shape(),
        new int[] {
          nHiddenUnits, 4 * nHiddenUnits + 3
        }); // Should be shape: [layerSize,4*layerSize+3]
    INDArray inputWeights = paramTable.get(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
    assertArrayEquals(
        inputWeights.shape(),
        new int[] {nIn, 4 * nHiddenUnits}); // Should be shape: [nIn,4*layerSize]
    INDArray biases = paramTable.get(GravesLSTMParamInitializer.BIAS_KEY);
    assertArrayEquals(
        biases.shape(), new int[] {1, 4 * nHiddenUnits}); // Should be shape: [1,4*layerSize]

    // Want forget gate biases to be initialized to > 0. See parameter initializer for details
    INDArray forgetGateBiases =
        biases.get(new INDArrayIndex[] {NDArrayIndex.interval(nHiddenUnits, 2 * nHiddenUnits)});
    assertTrue(forgetGateBiases.gt(0).sum(Integer.MAX_VALUE).getDouble(0) == nHiddenUnits);

    int nParams = recurrentWeights.length() + inputWeights.length() + biases.length();
    assertTrue(nParams == layer.numParams());
  }
예제 #2
0
  @Test
  public void testGRUInit() {
    int nIn = 8;
    int nOut = 25;
    int nHiddenUnits = 17;
    MultiLayerConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .list(2)
            .layer(
                0,
                new org.deeplearning4j.nn.conf.layers.GRU.Builder()
                    .nIn(nIn)
                    .nOut(nHiddenUnits)
                    .weightInit(WeightInit.DISTRIBUTION)
                    .build())
            .layer(
                1,
                new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS)
                    .nIn(nHiddenUnits)
                    .nOut(nOut)
                    .weightInit(WeightInit.DISTRIBUTION)
                    .build())
            .build();
    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();

    // Ensure that we have the correct number weights and biases, that these have correct shape etc.
    Layer layer = network.getLayer(0);
    assertTrue(layer instanceof GRU);

    Map<String, INDArray> paramTable = layer.paramTable();
    assertTrue(paramTable.size() == 3); // 2 sets of weights, 1 set of biases

    INDArray recurrentWeights = paramTable.get(GRUParamInitializer.RECURRENT_WEIGHT_KEY);
    assertArrayEquals(
        recurrentWeights.shape(),
        new int[] {nHiddenUnits, 3 * nHiddenUnits}); // Should be shape: [layerSize,3*layerSize]
    INDArray inputWeights = paramTable.get(GRUParamInitializer.INPUT_WEIGHT_KEY);
    assertArrayEquals(
        inputWeights.shape(),
        new int[] {nIn, 3 * nHiddenUnits}); // Should be shape: [nIn,3*layerSize]
    INDArray biases = paramTable.get(GRUParamInitializer.BIAS_KEY);
    assertArrayEquals(
        biases.shape(), new int[] {1, 3 * nHiddenUnits}); // Should be shape: [1,3*layerSize]

    int nParams = recurrentWeights.length() + inputWeights.length() + biases.length();
    assertTrue(nParams == layer.numParams());
  }