@Override public void init( Map<String, INDArray> params, NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParameters) { if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer)) throw new IllegalArgumentException( "unsupported layer type: " + conf.getLayer().getClass().getName()); int length = numParams(conf, true); if (paramsView.length() != length) throw new IllegalStateException( "Expected params view of length " + length + ", got length " + paramsView.length()); org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); int nIn = layerConf.getNIn(); int nOut = layerConf.getNOut(); int nWeightParams = nIn * nOut; INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nWeightParams)); INDArray biasView = paramsView.get( NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams, nWeightParams + nOut)); params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParameters)); params.put(BIAS_KEY, createBias(conf, biasView, initializeParameters)); conf.addVariable(WEIGHT_KEY); conf.addVariable(BIAS_KEY); }
@Override public void init( Map<String, INDArray> params, NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { if (((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer()) .getKernelSize() .length != 2) throw new IllegalArgumentException("Filter size must be == 2"); org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer(); int[] kernel = layerConf.getKernelSize(); int nIn = layerConf.getNIn(); int nOut = layerConf.getNOut(); INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nOut)); INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nOut, numParams(conf, true))); params.put(BIAS_KEY, createBias(conf, biasView, initializeParams)); params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); conf.addVariable(WEIGHT_KEY); conf.addVariable(BIAS_KEY); }
@Override public void init(Map<String, INDArray> params, NeuralNetConfiguration conf) { Distribution dist = Distributions.createDistribution(conf.getDist()); int nL = conf.getNOut(); // i.e., n neurons in this layer int nLast = conf.getNIn(); // i.e., n neurons in previous layer conf.addVariable(RECURRENT_WEIGHTS); conf.addVariable(INPUT_WEIGHTS); conf.addVariable(BIAS); params.put( RECURRENT_WEIGHTS, WeightInitUtil.initWeights(nL, 4 * nL + 3, conf.getWeightInit(), dist)); params.put( INPUT_WEIGHTS, WeightInitUtil.initWeights(nLast, 4 * nL, conf.getWeightInit(), dist)); INDArray biases = Nd4j.zeros(1, 4 * nL); // Order: input, forget, output, input modulation, i.e., IFOG biases.put( new NDArrayIndex[] {NDArrayIndex.interval(nL, 2 * nL), new NDArrayIndex(0)}, Nd4j.ones(1, nL).muli(5)); /*The above line initializes the forget gate biases to 5. * See Sutskever PhD thesis, pg19: * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning, * which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is * not done, it will be harder to learn long range dependencies because the smaller values of the forget * gates will create a vanishing gradients problem." * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf */ params.put(BIAS, biases); params.get(RECURRENT_WEIGHTS).data().persist(); params.get(INPUT_WEIGHTS).data().persist(); params.get(BIAS).data().persist(); }
@Override public void init(Map<String, INDArray> params, NeuralNetConfiguration conf) { if (conf.getKernelSize().length < 2) throw new IllegalArgumentException("Filter size must be == 2"); params.put(BIAS_KEY, createBias(conf)); params.put(WEIGHT_KEY, createWeightMatrix(conf)); conf.addVariable(WEIGHT_KEY); conf.addVariable(BIAS_KEY); }