@Override public void init(Map<String, INDArray> params, NeuralNetConfiguration conf) { Distribution dist = Distributions.createDistribution(conf.getDist()); int nL = conf.getNOut(); // i.e., n neurons in this layer int nLast = conf.getNIn(); // i.e., n neurons in previous layer conf.addVariable(RECURRENT_WEIGHTS); conf.addVariable(INPUT_WEIGHTS); conf.addVariable(BIAS); params.put( RECURRENT_WEIGHTS, WeightInitUtil.initWeights(nL, 4 * nL + 3, conf.getWeightInit(), dist)); params.put( INPUT_WEIGHTS, WeightInitUtil.initWeights(nLast, 4 * nL, conf.getWeightInit(), dist)); INDArray biases = Nd4j.zeros(1, 4 * nL); // Order: input, forget, output, input modulation, i.e., IFOG biases.put( new NDArrayIndex[] {NDArrayIndex.interval(nL, 2 * nL), new NDArrayIndex(0)}, Nd4j.ones(1, nL).muli(5)); /*The above line initializes the forget gate biases to 5. * See Sutskever PhD thesis, pg19: * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning, * which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is * not done, it will be harder to learn long range dependencies because the smaller values of the forget * gates will create a vanishing gradients problem." * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf */ params.put(BIAS, biases); params.get(RECURRENT_WEIGHTS).data().persist(); params.get(INPUT_WEIGHTS).data().persist(); params.get(BIAS).data().persist(); }
protected INDArray createWeightMatrix(NeuralNetConfiguration conf) { /** * Create a 4d weight matrix of: (number of kernels, num input channels, kernel height, kernel * width) Inputs to the convolution layer are: (batch size, num input feature maps, image * height, image width) */ Distribution dist = Distributions.createDistribution(conf.getDist()); return WeightInitUtil.initWeights( Ints.concat(new int[] {conf.getNOut(), conf.getNIn()}, conf.getKernelSize()), conf.getWeightInit(), dist); }
private static void checkNinNoutForEachLayer( int[] expNin, int[] expNout, MultiLayerConfiguration conf, MultiLayerNetwork network) { // Check configuration for (int i = 0; i < expNin.length; i++) { NeuralNetConfiguration layerConf = conf.getConf(i); assertTrue(layerConf.getNIn() == expNin[i]); assertTrue(layerConf.getNOut() == expNout[i]); } // Check Layer for (int i = 0; i < expNin.length; i++) { Layer layer = network.getLayers()[i]; assertTrue(layer.conf().getNIn() == expNin[i]); assertTrue(layer.conf().getNOut() == expNout[i]); int[] weightShape = layer.getParam(DefaultParamInitializer.WEIGHT_KEY).shape(); assertTrue(weightShape[0] == expNin[i]); assertTrue(weightShape[1] == expNout[i]); } }