@Override
  public void init(
      Map<String, INDArray> params,
      NeuralNetConfiguration conf,
      INDArray paramsView,
      boolean initializeParams) {
    if (((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer())
            .getKernelSize()
            .length
        != 2) throw new IllegalArgumentException("Filter size must be == 2");

    org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf =
        (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer();

    int[] kernel = layerConf.getKernelSize();
    int nIn = layerConf.getNIn();
    int nOut = layerConf.getNOut();

    INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nOut));
    INDArray weightView =
        paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nOut, numParams(conf, true)));

    params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
  }
 protected INDArray createWeightMatrix(
     NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) {
   /*
   Create a 4d weight matrix of:
     (number of kernels, num input channels, kernel height, kernel width)
   Note c order is used specifically for the CNN weights, as opposed to f order elsewhere
   Inputs to the convolution layer are:
   (batch size, num input feature maps, image height, image width)
   */
   org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf =
       (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer();
   if (initializeParams) {
     Distribution dist = Distributions.createDistribution(conf.getLayer().getDist());
     int[] kernel = layerConf.getKernelSize();
     return WeightInitUtil.initWeights(
         new int[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]},
         layerConf.getWeightInit(),
         dist,
         'c',
         weightView);
   } else {
     int[] kernel = layerConf.getKernelSize();
     return WeightInitUtil.reshapeWeights(
         new int[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]},
         weightView,
         'c');
   }
 }
  @Override
  public void init(
      Map<String, INDArray> params,
      NeuralNetConfiguration conf,
      INDArray paramsView,
      boolean initializeParameters) {
    if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer))
      throw new IllegalArgumentException(
          "unsupported layer type: " + conf.getLayer().getClass().getName());

    int length = numParams(conf, true);
    if (paramsView.length() != length)
      throw new IllegalStateException(
          "Expected params view of length " + length + ", got length " + paramsView.length());

    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
        (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
    int nIn = layerConf.getNIn();
    int nOut = layerConf.getNOut();

    int nWeightParams = nIn * nOut;
    INDArray weightView =
        paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nWeightParams));
    INDArray biasView =
        paramsView.get(
            NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams, nWeightParams + nOut));

    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParameters));
    params.put(BIAS_KEY, createBias(conf, biasView, initializeParameters));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
  }
  @Override
  public void init(Map<String, INDArray> params, NeuralNetConfiguration conf) {
    if (conf.getKernelSize().length < 2)
      throw new IllegalArgumentException("Filter size must be == 2");

    params.put(BIAS_KEY, createBias(conf));
    params.put(WEIGHT_KEY, createWeightMatrix(conf));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
  }
 protected INDArray createWeightMatrix(NeuralNetConfiguration conf) {
   /**
    * Create a 4d weight matrix of: (number of kernels, num input channels, kernel height, kernel
    * width) Inputs to the convolution layer are: (batch size, num input feature maps, image
    * height, image width)
    */
   Distribution dist = Distributions.createDistribution(conf.getDist());
   return WeightInitUtil.initWeights(
       Ints.concat(new int[] {conf.getNOut(), conf.getNIn()}, conf.getKernelSize()),
       conf.getWeightInit(),
       dist);
 }
  @Override
  public void init(Map<String, INDArray> params, NeuralNetConfiguration conf) {
    Distribution dist = Distributions.createDistribution(conf.getDist());

    int nL = conf.getNOut(); // i.e., n neurons in this layer
    int nLast = conf.getNIn(); // i.e., n neurons in previous layer

    conf.addVariable(RECURRENT_WEIGHTS);
    conf.addVariable(INPUT_WEIGHTS);
    conf.addVariable(BIAS);

    params.put(
        RECURRENT_WEIGHTS, WeightInitUtil.initWeights(nL, 4 * nL + 3, conf.getWeightInit(), dist));
    params.put(
        INPUT_WEIGHTS, WeightInitUtil.initWeights(nLast, 4 * nL, conf.getWeightInit(), dist));
    INDArray biases =
        Nd4j.zeros(1, 4 * nL); // Order: input, forget, output, input modulation, i.e., IFOG
    biases.put(
        new NDArrayIndex[] {NDArrayIndex.interval(nL, 2 * nL), new NDArrayIndex(0)},
        Nd4j.ones(1, nL).muli(5));
    /*The above line initializes the forget gate biases to 5.
     * See Sutskever PhD thesis, pg19:
     * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
     *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
     *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
     *  gates will create a vanishing gradients problem."
     *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
     */
    params.put(BIAS, biases);

    params.get(RECURRENT_WEIGHTS).data().persist();
    params.get(INPUT_WEIGHTS).data().persist();
    params.get(BIAS).data().persist();
  }
 // 1 bias per feature map
 protected INDArray createBias(
     NeuralNetConfiguration conf, INDArray biasView, boolean initializeParams) {
   // the bias is a 1D tensor -- one bias per output feature map
   org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf =
       (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer();
   if (initializeParams) biasView.assign(layerConf.getBiasInit());
   return biasView;
 }
 @Override
 public int numParams(NeuralNetConfiguration conf, boolean backprop) {
   org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
       (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
   int nIn = layerConf.getNIn();
   int nOut = layerConf.getNOut();
   return nIn * nOut + nOut; // weights + bias
 }
 protected INDArray createBias(
     NeuralNetConfiguration conf, INDArray biasParamView, boolean initializeParameters) {
   org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
       (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
   if (initializeParameters) {
     INDArray ret = Nd4j.valueArrayOf(layerConf.getNOut(), layerConf.getBiasInit());
     biasParamView.assign(ret);
   }
   return biasParamView;
 }
  @Override
  public int numParams(NeuralNetConfiguration conf, boolean backprop) {
    org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf =
        (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer();

    int[] kernel = layerConf.getKernelSize();
    int nIn = layerConf.getNIn();
    int nOut = layerConf.getNOut();
    return nIn * nOut * kernel[0] * kernel[1] + nOut;
  }
  /**
   * JSON model configuration passed in If you are entering a MultiLayerConfiguration JSON, your
   * file name MUST contain '_multi'. Otherwise, it will be processed as a regular
   * NeuralNetConfiguration
   *
   * <p>Takes in JSON file path Checks file path for indication of MultiLayer Reads JSON file to
   * string Creates neural net configuration from string config
   */
  @Override
  public <E> E value(String value) throws Exception {
    Boolean isMultiLayer = value.contains("_multi");
    String json = FileUtils.readFileToString(new File(value));

    if (isMultiLayer) {
      return (E) MultiLayerConfiguration.fromJson(json);
    } else {
      return (E) NeuralNetConfiguration.fromJson(json);
    }
  }
  private static void checkNinNoutForEachLayer(
      int[] expNin, int[] expNout, MultiLayerConfiguration conf, MultiLayerNetwork network) {

    // Check configuration
    for (int i = 0; i < expNin.length; i++) {
      NeuralNetConfiguration layerConf = conf.getConf(i);
      assertTrue(layerConf.getNIn() == expNin[i]);
      assertTrue(layerConf.getNOut() == expNout[i]);
    }

    // Check Layer
    for (int i = 0; i < expNin.length; i++) {
      Layer layer = network.getLayers()[i];
      assertTrue(layer.conf().getNIn() == expNin[i]);
      assertTrue(layer.conf().getNOut() == expNout[i]);
      int[] weightShape = layer.getParam(DefaultParamInitializer.WEIGHT_KEY).shape();
      assertTrue(weightShape[0] == expNin[i]);
      assertTrue(weightShape[1] == expNout[i]);
    }
  }
Example #13
0
 /**
  * Apply the regularization
  *
  * @param layer
  * @param gradient
  * @param param
  */
 public void postApply(Layer layer, INDArray gradient, String param) {
   NeuralNetConfiguration conf = layer.conf();
   INDArray params = layer.getParam(param);
   if (conf.isUseRegularization()
       && conf.getLayer().getL2() > 0
       && !(param.equals(DefaultParamInitializer.BIAS_KEY)))
     gradient.addi(
         params.mul(
             conf.getLayer()
                 .getL2())); // dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function
   if (conf.isUseRegularization()
       && conf.getLayer().getL1() > 0
       && !(param.equals(DefaultParamInitializer.BIAS_KEY)))
     gradient.addi(Transforms.sign(params).muli(conf.getLayer().getL1()));
   if (conf.isMiniBatch()) gradient.divi(layer.getInputMiniBatchSize());
   if (conf.isConstrainGradientToUnitNorm()) gradient.divi(gradient.norm2(Integer.MAX_VALUE));
 }
  public ModelAndGradient() {
    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .lossFunction(LossFunctions.LossFunction.MCXENT)
            .optimizationAlgo(OptimizationAlgorithm.ITERATION_GRADIENT_DESCENT)
            .activationFunction("softmax")
            .iterations(10)
            .weightInit(WeightInit.XAVIER)
            .learningRate(1e-1)
            .nIn(4)
            .nOut(3)
            .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer())
            .build();

    OutputLayer l =
        LayerFactories.getFactory(conf.getLayer())
            .create(conf, Arrays.<IterationListener>asList(new ScoreIterationListener(1)));
    this.model = l;
    l.setInput(Nd4j.ones(4));
    l.setLabels(Nd4j.ones(3));
    this.gradient = l.gradient();
  }
  @Test
  public void testModelSerde() throws Exception {
    ObjectMapper mapper = getMapper();
    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .momentum(0.9f)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .iterations(1000)
            .constrainGradientToUnitNorm(true)
            .learningRate(1e-1f)
            .layer(
                new org.deeplearning4j.nn.conf.layers.AutoEncoder.Builder()
                    .nIn(4)
                    .nOut(3)
                    .corruptionLevel(0.6)
                    .sparsity(0.5)
                    .lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY)
                    .build())
            .build();

    DataSet d2 = new IrisDataSetIterator(150, 150).next();

    INDArray input = d2.getFeatureMatrix();
    AutoEncoder da =
        LayerFactories.getFactory(conf.getLayer())
            .create(
                conf,
                Arrays.<IterationListener>asList(
                    new ScoreIterationListener(1), new HistogramIterationListener(1)),
                0);
    da.setInput(input);
    ModelAndGradient g = new ModelAndGradient(da);
    String json = mapper.writeValueAsString(g);
    ModelAndGradient read = mapper.readValue(json, ModelAndGradient.class);
    assertEquals(g, read);
  }
  protected INDArray createWeightMatrix(
      NeuralNetConfiguration conf, INDArray weightParamView, boolean initializeParameters) {
    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
        (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();

    if (initializeParameters) {
      Distribution dist = Distributions.createDistribution(layerConf.getDist());
      INDArray ret =
          WeightInitUtil.initWeights(
              layerConf.getNIn(),
              layerConf.getNOut(),
              layerConf.getWeightInit(),
              dist,
              weightParamView);
      return ret;
    } else {
      return WeightInitUtil.reshapeWeights(
          new int[] {layerConf.getNIn(), layerConf.getNOut()}, weightParamView);
    }
  }
  @Override
  public Map<String, INDArray> getGradientsFromFlattened(
      NeuralNetConfiguration conf, INDArray gradientView) {

    org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf =
        (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer();

    int[] kernel = layerConf.getKernelSize();
    int nIn = layerConf.getNIn();
    int nOut = layerConf.getNOut();

    INDArray biasGradientView =
        gradientView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nOut));
    INDArray weightGradientView =
        gradientView
            .get(NDArrayIndex.point(0), NDArrayIndex.interval(nOut, numParams(conf, true)))
            .reshape('c', nOut, nIn, kernel[0], kernel[1]);

    Map<String, INDArray> out = new LinkedHashMap<>();
    out.put(BIAS_KEY, biasGradientView);
    out.put(WEIGHT_KEY, weightGradientView);
    return out;
  }
  @Override
  public Map<String, INDArray> getGradientsFromFlattened(
      NeuralNetConfiguration conf, INDArray gradientView) {
    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
        (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
    int nIn = layerConf.getNIn();
    int nOut = layerConf.getNOut();
    int nWeightParams = nIn * nOut;

    INDArray weightGradientView =
        gradientView
            .get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nWeightParams))
            .reshape('f', nIn, nOut);
    INDArray biasView =
        gradientView.get(
            NDArrayIndex.point(0),
            NDArrayIndex.interval(nWeightParams, nWeightParams + nOut)); // Already a row vector

    Map<String, INDArray> out = new LinkedHashMap<>();
    out.put(WEIGHT_KEY, weightGradientView);
    out.put(BIAS_KEY, biasView);

    return out;
  }
  /**
   * Check backprop gradients for a MultiLayerNetwork.
   *
   * @param mln MultiLayerNetwork to test. This must be initialized.
   * @param epsilon Usually on the order/ of 1e-4 or so.
   * @param maxRelError Maximum relative error. Usually < 1e-5 or so, though maybe more for deep
   *     networks or those with nonlinear activation
   * @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be
   *     non-zero due to precision issues. For example, 0.0 vs. 1e-18: relative error is 1.0, but
   *     not really a failure
   * @param print Whether to print full pass/failure details for each parameter gradient
   * @param exitOnFirstError If true: return upon first failure. If false: continue checking even if
   *     one parameter gradient has failed. Typically use false for debugging, true for unit tests.
   * @param input Input array to use for forward pass. May be mini-batch data.
   * @param labels Labels/targets to use to calculate backprop gradient. May be mini-batch data.
   * @return true if gradients are passed, false otherwise.
   */
  public static boolean checkGradients(
      MultiLayerNetwork mln,
      double epsilon,
      double maxRelError,
      double minAbsoluteError,
      boolean print,
      boolean exitOnFirstError,
      INDArray input,
      INDArray labels) {
    // Basic sanity checks on input:
    if (epsilon <= 0.0 || epsilon > 0.1)
      throw new IllegalArgumentException(
          "Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
    if (maxRelError <= 0.0 || maxRelError > 0.25)
      throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
    if (!(mln.getOutputLayer() instanceof BaseOutputLayer))
      throw new IllegalArgumentException("Cannot check backprop gradients without OutputLayer");

    // Check network configuration:

    int layerCount = 0;
    for (NeuralNetConfiguration n : mln.getLayerWiseConfigurations().getConfs()) {
      org.deeplearning4j.nn.conf.Updater u = n.getLayer().getUpdater();
      if (u == org.deeplearning4j.nn.conf.Updater.SGD) {
        // Must have LR of 1.0
        double lr = n.getLayer().getLearningRate();
        if (lr != 1.0) {
          throw new IllegalStateException(
              "When using SGD updater, must also use lr=1.0 for layer "
                  + layerCount
                  + "; got "
                  + u
                  + " with lr="
                  + lr);
        }
      } else if (u != org.deeplearning4j.nn.conf.Updater.NONE) {
        throw new IllegalStateException(
            "Must have Updater.NONE (or SGD + lr=1.0) for layer " + layerCount + "; got " + u);
      }
    }

    mln.setInput(input);
    mln.setLabels(labels);
    mln.computeGradientAndScore();
    Pair<Gradient, Double> gradAndScore = mln.gradientAndScore();

    Updater updater = UpdaterCreator.getUpdater(mln);
    updater.update(mln, gradAndScore.getFirst(), 0, mln.batchSize());

    INDArray gradientToCheck =
        gradAndScore
            .getFirst()
            .gradient()
            .dup(); // need dup: gradients are a *view* of the full gradient array (which will
                    // change every time backprop is done)
    INDArray originalParams =
        mln.params().dup(); // need dup: params are a *view* of full parameters

    int nParams = originalParams.length();

    int totalNFailures = 0;
    double maxError = 0.0;
    for (int i = 0; i < nParams; i++) {
      // (w+epsilon): Do forward pass and score
      INDArray params = originalParams.dup();
      params.putScalar(i, params.getDouble(i) + epsilon);
      mln.setParameters(params);
      mln.computeGradientAndScore();
      double scorePlus = mln.score();

      // (w-epsilon): Do forward pass and score
      params.putScalar(i, params.getDouble(i) - 2 * epsilon); // +eps - 2*eps = -eps
      mln.setParameters(params);
      mln.computeGradientAndScore();
      double scoreMinus = mln.score();

      // Calculate numerical parameter gradient:
      double scoreDelta = scorePlus - scoreMinus;

      double numericalGradient = scoreDelta / (2 * epsilon);
      if (Double.isNaN(numericalGradient))
        throw new IllegalStateException(
            "Numerical gradient was NaN for parameter " + i + " of " + nParams);

      double backpropGradient = gradientToCheck.getDouble(i);
      // http://cs231n.github.io/neural-networks-3/#gradcheck
      // use mean centered
      double relError =
          Math.abs(backpropGradient - numericalGradient)
              / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
      if (backpropGradient == 0.0 && numericalGradient == 0.0)
        relError = 0.0; // Edge case: i.e., RNNs with time series length of 1.0

      if (relError > maxError) maxError = relError;
      if (relError > maxRelError || Double.isNaN(relError)) {
        double absError = Math.abs(backpropGradient - numericalGradient);
        if (absError < minAbsoluteError) {
          log.info(
              "Param "
                  + i
                  + " passed: grad= "
                  + backpropGradient
                  + ", numericalGrad= "
                  + numericalGradient
                  + ", relError= "
                  + relError
                  + "; absolute error = "
                  + absError
                  + " < minAbsoluteError = "
                  + minAbsoluteError);
        } else {
          if (print)
            log.info(
                "Param "
                    + i
                    + " FAILED: grad= "
                    + backpropGradient
                    + ", numericalGrad= "
                    + numericalGradient
                    + ", relError= "
                    + relError
                    + ", scorePlus="
                    + scorePlus
                    + ", scoreMinus= "
                    + scoreMinus);
          if (exitOnFirstError) return false;
          totalNFailures++;
        }
      } else if (print) {
        log.info(
            "Param "
                + i
                + " passed: grad= "
                + backpropGradient
                + ", numericalGrad= "
                + numericalGradient
                + ", relError= "
                + relError);
      }
    }

    if (print) {
      int nPass = nParams - totalNFailures;
      log.info(
          "GradientCheckUtil.checkGradients(): "
              + nParams
              + " params checked, "
              + nPass
              + " passed, "
              + totalNFailures
              + " failed. Largest relative error = "
              + maxError);
    }

    return totalNFailures == 0;
  }
 // 1 bias per feature map
 protected INDArray createBias(NeuralNetConfiguration conf) {
   // the bias is a 1D tensor -- one bias per output feature map
   return Nd4j.zeros(conf.getNOut());
 }