Ejemplo n.º 1
0
  @Test
  public void testAdamUpdater() {
    INDArray m, v;
    double lr = 0.01;
    int iteration = 0;
    double beta1 = 0.8;
    double beta2 = 0.888;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .iterations(iteration)
            .adamMeanDecay(beta1)
            .adamVarDecay(beta2)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.ADAM)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, iteration, 1);

    double beta1t = FastMath.pow(beta1, iteration);
    double beta2t = FastMath.pow(beta2, iteration);
    double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t);
    if (Double.isNaN(alphat) || alphat == 0.0) alphat = epsilon;

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      val = entry.getValue();
      m = Nd4j.zeros(val.shape());
      v = Nd4j.zeros(val.shape());

      m.muli(beta1).addi(val.mul(1.0 - beta1));
      v.muli(beta2).addi(val.mul(val).mul(1.0 - beta2));
      gradExpected = m.mul(alphat).divi(Transforms.sqrt(v).addi(epsilon));
      if (!gradExpected.equals(gradient.getGradientFor(entry.getKey()))) {
        System.out.println(Arrays.toString(gradExpected.dup().data().asFloat()));
        System.out.println(
            Arrays.toString(gradient.getGradientFor(entry.getKey()).dup().data().asFloat()));
      }
      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }

    assertEquals(beta1, layer.conf().getLayer().getAdamMeanDecay(), 1e-4);
    assertEquals(beta2, layer.conf().getLayer().getAdamVarDecay(), 1e-4);
  }
Ejemplo n.º 2
0
  @Test
  public void testRMSPropUpdater() {
    double lr = 0.01;
    double rmsDecay = 0.25;
    Map<String, INDArray> lastG = new HashMap<>();

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .rmsDecay(rmsDecay)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, -1, 1);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      key = entry.getKey();
      val = entry.getValue();
      INDArray lastGTmp = lastG.get(key);

      if (lastGTmp == null) lastGTmp = Nd4j.zeros(val.shape());

      lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay));
      gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(Nd4j.EPS_THRESHOLD)));

      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
      lastG.put(key, lastGTmp);
    }
    assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4);
  }
Ejemplo n.º 3
0
  @Test
  public void testNestorovsUpdater() {
    double lr = 1e-2;
    double mu = 0.6;
    INDArray v, vPrev;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .momentum(mu)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, -1, 1);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      val = entry.getValue();
      v = Nd4j.zeros(val.shape());
      vPrev = v;
      v = vPrev.mul(mu).subi(val.mul(lr));
      gradExpected = vPrev.muli(mu).addi(v.mul(-mu - 1));

      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }

    assertEquals(mu, layer.conf().getLayer().getMomentum(), 1e-4);
  }
Ejemplo n.º 4
0
  @Test
  public void testAdaGradUpdater() {
    double lr = 1e-2;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, -1, 1);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      val = entry.getValue();
      gradExpected = Transforms.sqrt(val.mul(val).add(epsilon)).rdiv(lr).mul(val);
      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
  }
Ejemplo n.º 5
0
  @Test
  public void testAdaDeltaUpdate() {
    INDArray dxSquared;
    Map<String, INDArray> msg = new HashMap<>();
    Map<String, INDArray> msdx = new HashMap<>();

    double rho = 0.85;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .rho(rho)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

    for (int i = 0; i < 2; i++) {
      updater.update(layer, gradient, i, 1);

      // calculations for one iteration / update

      for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        key = entry.getKey();
        val = entry.getValue();
        INDArray msgTmp = msg.get(key);
        INDArray msdxTmp = msdx.get(key);

        if (msgTmp == null) {
          msgTmp = Nd4j.zeros(val.shape());
          msdxTmp = Nd4j.zeros(val.shape());
        }

        msgTmp.muli(rho);
        msgTmp.addi(1 - rho).muli(val.mul(val));

        gradExpected =
            Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD))
                .divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD)))
                .muli(val);
        gradientDup.setGradientFor(key, gradExpected);
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));

        msdxTmp.muli(rho);
        dxSquared = gradExpected.mul(gradExpected);
        msdxTmp.addi(dxSquared.muli(1 - rho));

        msg.put(key, msgTmp);
        msdx.put(key, msdxTmp);
      }
      assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4);
    }
  }