コード例 #1
0
 protected INDArray createWeightMatrix(
     NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) {
   /*
   Create a 4d weight matrix of:
     (number of kernels, num input channels, kernel height, kernel width)
   Note c order is used specifically for the CNN weights, as opposed to f order elsewhere
   Inputs to the convolution layer are:
   (batch size, num input feature maps, image height, image width)
   */
   org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf =
       (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer();
   if (initializeParams) {
     Distribution dist = Distributions.createDistribution(conf.getLayer().getDist());
     int[] kernel = layerConf.getKernelSize();
     return WeightInitUtil.initWeights(
         new int[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]},
         layerConf.getWeightInit(),
         dist,
         'c',
         weightView);
   } else {
     int[] kernel = layerConf.getKernelSize();
     return WeightInitUtil.reshapeWeights(
         new int[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]},
         weightView,
         'c');
   }
 }
コード例 #2
0
  @Override
  public void init(Map<String, INDArray> params, NeuralNetConfiguration conf) {
    Distribution dist = Distributions.createDistribution(conf.getDist());

    int nL = conf.getNOut(); // i.e., n neurons in this layer
    int nLast = conf.getNIn(); // i.e., n neurons in previous layer

    conf.addVariable(RECURRENT_WEIGHTS);
    conf.addVariable(INPUT_WEIGHTS);
    conf.addVariable(BIAS);

    params.put(
        RECURRENT_WEIGHTS, WeightInitUtil.initWeights(nL, 4 * nL + 3, conf.getWeightInit(), dist));
    params.put(
        INPUT_WEIGHTS, WeightInitUtil.initWeights(nLast, 4 * nL, conf.getWeightInit(), dist));
    INDArray biases =
        Nd4j.zeros(1, 4 * nL); // Order: input, forget, output, input modulation, i.e., IFOG
    biases.put(
        new NDArrayIndex[] {NDArrayIndex.interval(nL, 2 * nL), new NDArrayIndex(0)},
        Nd4j.ones(1, nL).muli(5));
    /*The above line initializes the forget gate biases to 5.
     * See Sutskever PhD thesis, pg19:
     * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
     *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
     *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
     *  gates will create a vanishing gradients problem."
     *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
     */
    params.put(BIAS, biases);

    params.get(RECURRENT_WEIGHTS).data().persist();
    params.get(INPUT_WEIGHTS).data().persist();
    params.get(BIAS).data().persist();
  }
コード例 #3
0
 protected INDArray createWeightMatrix(NeuralNetConfiguration conf) {
   /**
    * Create a 4d weight matrix of: (number of kernels, num input channels, kernel height, kernel
    * width) Inputs to the convolution layer are: (batch size, num input feature maps, image
    * height, image width)
    */
   Distribution dist = Distributions.createDistribution(conf.getDist());
   return WeightInitUtil.initWeights(
       Ints.concat(new int[] {conf.getNOut(), conf.getNIn()}, conf.getKernelSize()),
       conf.getWeightInit(),
       dist);
 }
コード例 #4
0
  protected INDArray createWeightMatrix(
      NeuralNetConfiguration conf, INDArray weightParamView, boolean initializeParameters) {
    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
        (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();

    if (initializeParameters) {
      Distribution dist = Distributions.createDistribution(layerConf.getDist());
      INDArray ret =
          WeightInitUtil.initWeights(
              layerConf.getNIn(),
              layerConf.getNOut(),
              layerConf.getWeightInit(),
              dist,
              weightParamView);
      return ret;
    } else {
      return WeightInitUtil.reshapeWeights(
          new int[] {layerConf.getNIn(), layerConf.getNOut()}, weightParamView);
    }
  }