protected INDArray createWeightMatrix( NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) { /* Create a 4d weight matrix of: (number of kernels, num input channels, kernel height, kernel width) Note c order is used specifically for the CNN weights, as opposed to f order elsewhere Inputs to the convolution layer are: (batch size, num input feature maps, image height, image width) */ org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer(); if (initializeParams) { Distribution dist = Distributions.createDistribution(conf.getLayer().getDist()); int[] kernel = layerConf.getKernelSize(); return WeightInitUtil.initWeights( new int[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]}, layerConf.getWeightInit(), dist, 'c', weightView); } else { int[] kernel = layerConf.getKernelSize(); return WeightInitUtil.reshapeWeights( new int[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]}, weightView, 'c'); } }
@Override public void init(Map<String, INDArray> params, NeuralNetConfiguration conf) { Distribution dist = Distributions.createDistribution(conf.getDist()); int nL = conf.getNOut(); // i.e., n neurons in this layer int nLast = conf.getNIn(); // i.e., n neurons in previous layer conf.addVariable(RECURRENT_WEIGHTS); conf.addVariable(INPUT_WEIGHTS); conf.addVariable(BIAS); params.put( RECURRENT_WEIGHTS, WeightInitUtil.initWeights(nL, 4 * nL + 3, conf.getWeightInit(), dist)); params.put( INPUT_WEIGHTS, WeightInitUtil.initWeights(nLast, 4 * nL, conf.getWeightInit(), dist)); INDArray biases = Nd4j.zeros(1, 4 * nL); // Order: input, forget, output, input modulation, i.e., IFOG biases.put( new NDArrayIndex[] {NDArrayIndex.interval(nL, 2 * nL), new NDArrayIndex(0)}, Nd4j.ones(1, nL).muli(5)); /*The above line initializes the forget gate biases to 5. * See Sutskever PhD thesis, pg19: * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning, * which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is * not done, it will be harder to learn long range dependencies because the smaller values of the forget * gates will create a vanishing gradients problem." * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf */ params.put(BIAS, biases); params.get(RECURRENT_WEIGHTS).data().persist(); params.get(INPUT_WEIGHTS).data().persist(); params.get(BIAS).data().persist(); }
protected INDArray createWeightMatrix(NeuralNetConfiguration conf) { /** * Create a 4d weight matrix of: (number of kernels, num input channels, kernel height, kernel * width) Inputs to the convolution layer are: (batch size, num input feature maps, image * height, image width) */ Distribution dist = Distributions.createDistribution(conf.getDist()); return WeightInitUtil.initWeights( Ints.concat(new int[] {conf.getNOut(), conf.getNIn()}, conf.getKernelSize()), conf.getWeightInit(), dist); }
protected INDArray createWeightMatrix( NeuralNetConfiguration conf, INDArray weightParamView, boolean initializeParameters) { org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); if (initializeParameters) { Distribution dist = Distributions.createDistribution(layerConf.getDist()); INDArray ret = WeightInitUtil.initWeights( layerConf.getNIn(), layerConf.getNOut(), layerConf.getWeightInit(), dist, weightParamView); return ret; } else { return WeightInitUtil.reshapeWeights( new int[] {layerConf.getNIn(), layerConf.getNOut()}, weightParamView); } }