@Override
  public void init(
      Map<String, INDArray> params,
      NeuralNetConfiguration conf,
      INDArray paramsView,
      boolean initializeParameters) {
    if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer))
      throw new IllegalArgumentException(
          "unsupported layer type: " + conf.getLayer().getClass().getName());

    int length = numParams(conf, true);
    if (paramsView.length() != length)
      throw new IllegalStateException(
          "Expected params view of length " + length + ", got length " + paramsView.length());

    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
        (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
    int nIn = layerConf.getNIn();
    int nOut = layerConf.getNOut();

    int nWeightParams = nIn * nOut;
    INDArray weightView =
        paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nWeightParams));
    INDArray biasView =
        paramsView.get(
            NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams, nWeightParams + nOut));

    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParameters));
    params.put(BIAS_KEY, createBias(conf, biasView, initializeParameters));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
  }
  @Override
  public void init(
      Map<String, INDArray> params,
      NeuralNetConfiguration conf,
      INDArray paramsView,
      boolean initializeParams) {
    if (((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer())
            .getKernelSize()
            .length
        != 2) throw new IllegalArgumentException("Filter size must be == 2");

    org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf =
        (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer();

    int[] kernel = layerConf.getKernelSize();
    int nIn = layerConf.getNIn();
    int nOut = layerConf.getNOut();

    INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nOut));
    INDArray weightView =
        paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nOut, numParams(conf, true)));

    params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
  }
  @Override
  public void init(Map<String, INDArray> params, NeuralNetConfiguration conf) {
    Distribution dist = Distributions.createDistribution(conf.getDist());

    int nL = conf.getNOut(); // i.e., n neurons in this layer
    int nLast = conf.getNIn(); // i.e., n neurons in previous layer

    conf.addVariable(RECURRENT_WEIGHTS);
    conf.addVariable(INPUT_WEIGHTS);
    conf.addVariable(BIAS);

    params.put(
        RECURRENT_WEIGHTS, WeightInitUtil.initWeights(nL, 4 * nL + 3, conf.getWeightInit(), dist));
    params.put(
        INPUT_WEIGHTS, WeightInitUtil.initWeights(nLast, 4 * nL, conf.getWeightInit(), dist));
    INDArray biases =
        Nd4j.zeros(1, 4 * nL); // Order: input, forget, output, input modulation, i.e., IFOG
    biases.put(
        new NDArrayIndex[] {NDArrayIndex.interval(nL, 2 * nL), new NDArrayIndex(0)},
        Nd4j.ones(1, nL).muli(5));
    /*The above line initializes the forget gate biases to 5.
     * See Sutskever PhD thesis, pg19:
     * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
     *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
     *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
     *  gates will create a vanishing gradients problem."
     *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
     */
    params.put(BIAS, biases);

    params.get(RECURRENT_WEIGHTS).data().persist();
    params.get(INPUT_WEIGHTS).data().persist();
    params.get(BIAS).data().persist();
  }
  @Override
  public void init(Map<String, INDArray> params, NeuralNetConfiguration conf) {
    if (conf.getKernelSize().length < 2)
      throw new IllegalArgumentException("Filter size must be == 2");

    params.put(BIAS_KEY, createBias(conf));
    params.put(WEIGHT_KEY, createWeightMatrix(conf));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
  }