// note precision is off on this test but the numbers are close
  // investigation in a future release should determine how to resolve
  @Test
  public void testCalculateDeltaContained() {
    Layer layer = getContainedConfig();
    INDArray input = getContainedData();
    INDArray col = getContainedCol();
    INDArray epsilon = Nd4j.ones(1, 2, 4, 4);

    INDArray expectedOutput =
        Nd4j.create(
            new double[] {
              0.02036651, 0.02036651, 0.02036651, 0.02036651, 0.00039383,
              0.00039383, 0.00039383, 0.00039383, 0.02036651, 0.02036651,
              0.02036651, 0.02036651, 0.00039383, 0.00039383, 0.00039383,
              0.00039383, 0.02036651, 0.02036651, 0.02036651, 0.02036651,
              0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.02036651,
              0.02036651, 0.02036651, 0.02036651, 0.00039383, 0.00039383,
              0.00039383, 0.00039383
            },
            new int[] {1, 2, 4, 4});

    layer.setInput(input);
    org.deeplearning4j.nn.layers.convolution.ConvolutionLayer layer2 =
        (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) layer;
    layer2.setCol(col);
    INDArray delta = layer2.calculateDelta(epsilon);

    assertArrayEquals(expectedOutput.shape(), delta.shape());
    assertEquals(expectedOutput, delta);
  }
  // TODO remove/move, technically this is testing Nd4j functionality
  @Test
  public void testCreateFeatureMapMethod() {
    Layer layer = getContainedConfig();
    INDArray input = getContainedData();
    int inputWidth = input.shape()[0];
    int featureMapWidth =
        (inputWidth + layer.conf().getPadding()[0] * 2 - layer.conf().getKernelSize()[0])
                / layer.conf().getStride()[0]
            + 1;

    INDArray expectedOutput =
        Nd4j.create(
            new double[] {
              1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3,
              3, 1, 1, 1, 1, 3, 3, 3, 3, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4,
              4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4
            },
            new int[] {1, 1, 2, 2, 4, 4});

    layer.setInput(input);
    org.deeplearning4j.nn.layers.convolution.ConvolutionLayer layer2 =
        (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) layer;
    INDArray featureMaps = layer2.createFeatureMapColumn();

    assertEquals(featureMapWidth, featureMaps.shape()[4]);
    assertEquals(expectedOutput.shape(), featureMaps.shape());
    assertEquals(expectedOutput, featureMaps);
  }
Esempio n. 3
0
  @Test
  public void testAdamUpdater() {
    INDArray m, v;
    double lr = 0.01;
    int iteration = 0;
    double beta1 = 0.8;
    double beta2 = 0.888;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .iterations(iteration)
            .adamMeanDecay(beta1)
            .adamVarDecay(beta2)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.ADAM)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, iteration, 1);

    double beta1t = FastMath.pow(beta1, iteration);
    double beta2t = FastMath.pow(beta2, iteration);
    double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t);
    if (Double.isNaN(alphat) || alphat == 0.0) alphat = epsilon;

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      val = entry.getValue();
      m = Nd4j.zeros(val.shape());
      v = Nd4j.zeros(val.shape());

      m.muli(beta1).addi(val.mul(1.0 - beta1));
      v.muli(beta2).addi(val.mul(val).mul(1.0 - beta2));
      gradExpected = m.mul(alphat).divi(Transforms.sqrt(v).addi(epsilon));
      if (!gradExpected.equals(gradient.getGradientFor(entry.getKey()))) {
        System.out.println(Arrays.toString(gradExpected.dup().data().asFloat()));
        System.out.println(
            Arrays.toString(gradient.getGradientFor(entry.getKey()).dup().data().asFloat()));
      }
      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }

    assertEquals(beta1, layer.conf().getLayer().getAdamMeanDecay(), 1e-4);
    assertEquals(beta2, layer.conf().getLayer().getAdamVarDecay(), 1e-4);
  }
Esempio n. 4
0
  @Test
  public void testSGDUpdater() {
    double lr = 0.05;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.SGD)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);

    updater.update(layer, gradient, -1, 1);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      val = entry.getValue();
      gradExpected = val.mul(lr);
      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
  }
  @Test
  public void testSubSampleLayerNoneBackprop() throws Exception {
    Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding);

    Pair<Gradient, INDArray> out = layer.backpropGradient(epsilon);
    assertEquals(epsilon.shape().length, out.getSecond().shape().length);
    assertEquals(nExamples, out.getSecond().size(1)); // depth retained
  }
  @Test
  public void testCNNInputSetupMNIST() throws Exception {
    INDArray input = getMnistData();
    Layer layer = getMNISTConfig();
    layer.activate(input);

    assertEquals(input, layer.input());
    assertArrayEquals(input.shape(), layer.input().shape());
  }
  @Test
  public void testGravesLSTMInit() {
    int nIn = 8;
    int nOut = 25;
    int nHiddenUnits = 17;
    MultiLayerConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .list(2)
            .layer(
                0,
                new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder()
                    .nIn(nIn)
                    .nOut(nHiddenUnits)
                    .weightInit(WeightInit.DISTRIBUTION)
                    .activation("tanh")
                    .build())
            .layer(
                1,
                new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS)
                    .nIn(nHiddenUnits)
                    .nOut(nOut)
                    .weightInit(WeightInit.DISTRIBUTION)
                    .activation("tanh")
                    .build())
            .build();
    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();

    // Ensure that we have the correct number weights and biases, that these have correct shape etc.
    Layer layer = network.getLayer(0);
    assertTrue(layer instanceof GravesLSTM);

    Map<String, INDArray> paramTable = layer.paramTable();
    assertTrue(paramTable.size() == 3); // 2 sets of weights, 1 set of biases

    INDArray recurrentWeights = paramTable.get(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
    assertArrayEquals(
        recurrentWeights.shape(),
        new int[] {
          nHiddenUnits, 4 * nHiddenUnits + 3
        }); // Should be shape: [layerSize,4*layerSize+3]
    INDArray inputWeights = paramTable.get(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
    assertArrayEquals(
        inputWeights.shape(),
        new int[] {nIn, 4 * nHiddenUnits}); // Should be shape: [nIn,4*layerSize]
    INDArray biases = paramTable.get(GravesLSTMParamInitializer.BIAS_KEY);
    assertArrayEquals(
        biases.shape(), new int[] {1, 4 * nHiddenUnits}); // Should be shape: [1,4*layerSize]

    // Want forget gate biases to be initialized to > 0. See parameter initializer for details
    INDArray forgetGateBiases =
        biases.get(new INDArrayIndex[] {NDArrayIndex.interval(nHiddenUnits, 2 * nHiddenUnits)});
    assertTrue(forgetGateBiases.gt(0).sum(Integer.MAX_VALUE).getDouble(0) == nHiddenUnits);

    int nParams = recurrentWeights.length() + inputWeights.length() + biases.length();
    assertTrue(nParams == layer.numParams());
  }
  @Test
  public void testFeatureMapShape() throws Exception {
    INDArray input = getMnistData();

    Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding);
    INDArray convActivations = layer.activate(input);

    assertEquals(featureMapWidth, convActivations.size(2));
    assertEquals(depth, convActivations.size(0));
  }
  // note precision is off on this test but the numbers are close
  // investigation in a future release should determine how to resolve
  @Test
  public void testBackpropResultsContained() {
    Layer layer = getContainedConfig();
    INDArray input = getContainedData();
    INDArray col = getContainedCol();
    INDArray epsilon = Nd4j.ones(1, 2, 4, 4);

    INDArray expectedBiasGradient =
        Nd4j.create(new double[] {0.16608272, 0.16608272}, new int[] {1, 2});
    INDArray expectedWeightGradient =
        Nd4j.create(
            new double[] {
              0.17238397,
              0.17238397,
              0.33846668,
              0.33846668,
              0.17238397,
              0.17238397,
              0.33846668,
              0.33846668
            },
            new int[] {2, 1, 2, 2});
    INDArray expectedEpsilon =
        Nd4j.create(
            new double[] {
              0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.00039383,
              0.00039383, 0., 0., 0.00039383, 0.00039383,
              0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.,
              0., 0.02036651, 0.02036651, 0.02036651, 0.02036651,
              0.02036651, 0.02036651, 0., 0., 0.02036651,
              0.02036651, 0.02036651, 0.02036651, 0.02036651, 0.02036651,
              0., 0., 0.00039383, 0.00039383, 0.00039383,
              0.00039383, 0.00039383, 0.00039383, 0., 0.,
              0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.00039383,
              0.00039383, 0., 0., 0., 0.,
              0., 0., 0., 0., 0.,
              0., 0., 0., 0., 0.,
              0., 0., 0., 0.
            },
            new int[] {1, 1, 8, 8});

    layer.setInput(input);
    org.deeplearning4j.nn.layers.convolution.ConvolutionLayer layer2 =
        (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) layer;
    layer2.setCol(col);
    Pair<Gradient, INDArray> pair = layer2.backpropGradient(epsilon);

    assertArrayEquals(expectedEpsilon.shape(), pair.getSecond().shape());
    assertArrayEquals(expectedWeightGradient.shape(), pair.getFirst().getGradientFor("W").shape());
    assertArrayEquals(expectedBiasGradient.shape(), pair.getFirst().getGradientFor("b").shape());
    assertEquals(expectedEpsilon, pair.getSecond());
    assertEquals(expectedWeightGradient, pair.getFirst().getGradientFor("W"));
    assertEquals(expectedBiasGradient, pair.getFirst().getGradientFor("b"));
  }
  @Test
  public void testCNNInputSetup() throws Exception {
    INDArray input = getMnistData();
    int[] stride = new int[] {3, 3};
    int[] padding = new int[] {1, 1};

    Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding);
    layer.activate(input);

    assertEquals(input, layer.input());
    assertEquals(input.shape(), layer.input().shape());
  }
Esempio n. 11
0
  @Test
  public void testGRUInit() {
    int nIn = 8;
    int nOut = 25;
    int nHiddenUnits = 17;
    MultiLayerConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .list(2)
            .layer(
                0,
                new org.deeplearning4j.nn.conf.layers.GRU.Builder()
                    .nIn(nIn)
                    .nOut(nHiddenUnits)
                    .weightInit(WeightInit.DISTRIBUTION)
                    .build())
            .layer(
                1,
                new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS)
                    .nIn(nHiddenUnits)
                    .nOut(nOut)
                    .weightInit(WeightInit.DISTRIBUTION)
                    .build())
            .build();
    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();

    // Ensure that we have the correct number weights and biases, that these have correct shape etc.
    Layer layer = network.getLayer(0);
    assertTrue(layer instanceof GRU);

    Map<String, INDArray> paramTable = layer.paramTable();
    assertTrue(paramTable.size() == 3); // 2 sets of weights, 1 set of biases

    INDArray recurrentWeights = paramTable.get(GRUParamInitializer.RECURRENT_WEIGHT_KEY);
    assertArrayEquals(
        recurrentWeights.shape(),
        new int[] {nHiddenUnits, 3 * nHiddenUnits}); // Should be shape: [layerSize,3*layerSize]
    INDArray inputWeights = paramTable.get(GRUParamInitializer.INPUT_WEIGHT_KEY);
    assertArrayEquals(
        inputWeights.shape(),
        new int[] {nIn, 3 * nHiddenUnits}); // Should be shape: [nIn,3*layerSize]
    INDArray biases = paramTable.get(GRUParamInitializer.BIAS_KEY);
    assertArrayEquals(
        biases.shape(), new int[] {1, 3 * nHiddenUnits}); // Should be shape: [1,3*layerSize]

    int nParams = recurrentWeights.length() + inputWeights.length() + biases.length();
    assertTrue(nParams == layer.numParams());
  }
  public Layer getContainedConfig() {
    int[] kernelSize = new int[] {2, 2};
    int[] stride = new int[] {2, 2};
    int[] padding = new int[] {0, 0};
    int nChannelsIn = 1;
    int depth = 2;

    INDArray W =
        Nd4j.create(new double[] {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}, new int[] {2, 1, 2, 2});
    INDArray b = Nd4j.create(new double[] {1, 1});
    Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding);
    layer.setParam("W", W);
    layer.setParam("b", b);

    return layer;
  }
Esempio n. 13
0
  @Test
  public void testRMSPropUpdater() {
    double lr = 0.01;
    double rmsDecay = 0.25;
    Map<String, INDArray> lastG = new HashMap<>();

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .rmsDecay(rmsDecay)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, -1, 1);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      key = entry.getKey();
      val = entry.getValue();
      INDArray lastGTmp = lastG.get(key);

      if (lastGTmp == null) lastGTmp = Nd4j.zeros(val.shape());

      lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay));
      gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(Nd4j.EPS_THRESHOLD)));

      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
      lastG.put(key, lastGTmp);
    }
    assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4);
  }
Esempio n. 14
0
 /**
  * Apply the regularization
  *
  * @param layer
  * @param gradient
  * @param param
  */
 public void postApply(Layer layer, INDArray gradient, String param) {
   NeuralNetConfiguration conf = layer.conf();
   INDArray params = layer.getParam(param);
   if (conf.isUseRegularization()
       && conf.getLayer().getL2() > 0
       && !(param.equals(DefaultParamInitializer.BIAS_KEY)))
     gradient.addi(
         params.mul(
             conf.getLayer()
                 .getL2())); // dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function
   if (conf.isUseRegularization()
       && conf.getLayer().getL1() > 0
       && !(param.equals(DefaultParamInitializer.BIAS_KEY)))
     gradient.addi(Transforms.sign(params).muli(conf.getLayer().getL1()));
   if (conf.isMiniBatch()) gradient.divi(layer.getInputMiniBatchSize());
   if (conf.isConstrainGradientToUnitNorm()) gradient.divi(gradient.norm2(Integer.MAX_VALUE));
 }
  @Test
  public void testFeatureMapShapeMNIST() throws Exception {
    int inputWidth = 28;
    int[] stride = new int[] {2, 2};
    int[] padding = new int[] {0, 0};
    int[] kernelSize = new int[] {9, 9};
    int nChannelsIn = 1;
    int depth = 20;
    int featureMapWidth = (inputWidth + padding[1] * 2 - kernelSize[1]) / stride[1] + 1;

    INDArray input = getMnistData();

    Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding);
    INDArray convActivations = layer.activate(input);

    assertEquals(featureMapWidth, convActivations.size(2));
    assertEquals(depth, convActivations.size(1));
  }
Esempio n. 16
0
  @Test
  public void testNestorovsUpdater() {
    double lr = 1e-2;
    double mu = 0.6;
    INDArray v, vPrev;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .momentum(mu)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, -1, 1);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      val = entry.getValue();
      v = Nd4j.zeros(val.shape());
      vPrev = v;
      v = vPrev.mul(mu).subi(val.mul(lr));
      gradExpected = vPrev.muli(mu).addi(v.mul(-mu - 1));

      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }

    assertEquals(mu, layer.conf().getLayer().getMomentum(), 1e-4);
  }
  @Override
  public GradientUpdater init(String variable, INDArray gradient, Layer layer) {
    AdaGrad adaGrad = (AdaGrad) updaterForVariable.get(variable);
    if (adaGrad == null) {
      adaGrad = new AdaGrad(layer.conf().getLr());
      updaterForVariable.put(variable, adaGrad);
    }

    return adaGrad;
  }
  public Layer getContainedConfig() {
    //        int inputWidth = input.shape()[0];
    //        int inputHeight = input.shape()[1];

    int[] kernelSize = new int[] {2, 2};
    int[] stride = new int[] {2, 2};
    int[] padding = new int[] {0, 0};
    int nChannelsIn = 1;
    int depth = 2;
    //        int featureMapWidth = (inputWidth + padding[0] * 2 - kernelSize[0]) / stride[0] + 1;
    //        int featureMapHeight = (inputHeight + padding[1] * 2 - kernelSize[1]) / stride[0] + 1;
    INDArray W =
        Nd4j.create(new double[] {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}, new int[] {2, 1, 2, 2});
    INDArray b = Nd4j.create(new double[] {1, 1});
    Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding);
    layer.setParam("W", W);
    layer.setParam("b", b);

    return layer;
  }
  private static void checkNinNoutForEachLayer(
      int[] expNin, int[] expNout, MultiLayerConfiguration conf, MultiLayerNetwork network) {

    // Check configuration
    for (int i = 0; i < expNin.length; i++) {
      NeuralNetConfiguration layerConf = conf.getConf(i);
      assertTrue(layerConf.getNIn() == expNin[i]);
      assertTrue(layerConf.getNOut() == expNout[i]);
    }

    // Check Layer
    for (int i = 0; i < expNin.length; i++) {
      Layer layer = network.getLayers()[i];
      assertTrue(layer.conf().getNIn() == expNin[i]);
      assertTrue(layer.conf().getNOut() == expNout[i]);
      int[] weightShape = layer.getParam(DefaultParamInitializer.WEIGHT_KEY).shape();
      assertTrue(weightShape[0] == expNin[i]);
      assertTrue(weightShape[1] == expNout[i]);
    }
  }
  @Test
  public void testActivateResults() {
    Layer layer = getContainedConfig();
    INDArray input = getContainedData();
    INDArray expectedOutput =
        Nd4j.create(
            new double[] {
              0.98201379, 0.98201379, 0.98201379, 0.98201379, 0.99966465,
              0.99966465, 0.99966465, 0.99966465, 0.98201379, 0.98201379,
              0.98201379, 0.98201379, 0.99966465, 0.99966465, 0.99966465,
              0.99966465, 0.98201379, 0.98201379, 0.98201379, 0.98201379,
              0.99966465, 0.99966465, 0.99966465, 0.99966465, 0.98201379,
              0.98201379, 0.98201379, 0.98201379, 0.99966465, 0.99966465,
              0.99966465, 0.99966465
            },
            new int[] {1, 2, 4, 4});

    INDArray convActivations = layer.activate(input);

    assertEquals(expectedOutput, convActivations);
    assertEquals(expectedOutput.shape(), convActivations.shape());
  }
Esempio n. 21
0
  @Test
  public void testAdaGradUpdater() {
    double lr = 1e-2;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, -1, 1);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      val = entry.getValue();
      gradExpected = Transforms.sqrt(val.mul(val).add(epsilon)).rdiv(lr).mul(val);
      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
  }
Esempio n. 22
0
  public static void main(String[] args) {

    final int numRows = 2;
    final int numColumns = 2;
    int nChannels = 1;
    int outputNum = 3;
    int numSamples = 150;
    int batchSize = 110;
    int iterations = 10;
    int splitTrainNum = 100;
    int seed = 123;
    int listenerFreq = 1;

    /** Set a neural network configuration with multiple layers */
    log.info("Load data....");
    DataSetIterator irisIter = new IrisDataSetIterator(batchSize, numSamples);
    DataSet iris = irisIter.next();
    iris.normalizeZeroMeanZeroUnitVariance();

    SplitTestAndTrain trainTest = iris.splitTestAndTrain(splitTrainNum, new Random(seed));

    MultiLayerConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .seed(seed)
            .iterations(iterations)
            .batchSize(batchSize)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .constrainGradientToUnitNorm(true)
            .l2(2e-4)
            .regularization(true)
            .useDropConnect(true)
            .list(2)
            .layer(
                0,
                new ConvolutionLayer.Builder(new int[] {1, 1})
                    .nIn(nChannels)
                    .nOut(6)
                    .dropOut(0.5)
                    .activation("relu")
                    .weightInit(WeightInit.XAVIER)
                    .build())
            .layer(
                1,
                new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                    .nIn(6)
                    .nOut(outputNum)
                    .weightInit(WeightInit.XAVIER)
                    .activation("softmax")
                    .build())
            .inputPreProcessor(0, new FeedForwardToCnnPreProcessor(numRows, numColumns, nChannels))
            .inputPreProcessor(1, new CnnToFeedForwardPreProcessor())
            .backprop(true)
            .pretrain(false)
            .build();

    log.info("Build model....");
    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();
    model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq)));

    log.info("Train model....");
    model.fit(trainTest.getTrain());

    log.info("Evaluate weights....");
    for (org.deeplearning4j.nn.api.Layer layer : model.getLayers()) {
      INDArray w = layer.getParam(DefaultParamInitializer.WEIGHT_KEY);
      log.info("Weights: " + w);
    }

    log.info("Evaluate model....");
    Evaluation eval = new Evaluation(outputNum);
    INDArray output = model.output(trainTest.getTest().getFeatureMatrix());
    eval.eval(trainTest.getTest().getLabels(), output);
    log.info(eval.stats());

    log.info("****************Example finished********************");
  }
Esempio n. 23
0
  @Test
  public void testAdaDeltaUpdate() {
    INDArray dxSquared;
    Map<String, INDArray> msg = new HashMap<>();
    Map<String, INDArray> msdx = new HashMap<>();

    double rho = 0.85;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .rho(rho)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

    for (int i = 0; i < 2; i++) {
      updater.update(layer, gradient, i, 1);

      // calculations for one iteration / update

      for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        key = entry.getKey();
        val = entry.getValue();
        INDArray msgTmp = msg.get(key);
        INDArray msdxTmp = msdx.get(key);

        if (msgTmp == null) {
          msgTmp = Nd4j.zeros(val.shape());
          msdxTmp = Nd4j.zeros(val.shape());
        }

        msgTmp.muli(rho);
        msgTmp.addi(1 - rho).muli(val.mul(val));

        gradExpected =
            Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD))
                .divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD)))
                .muli(val);
        gradientDup.setGradientFor(key, gradExpected);
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));

        msdxTmp.muli(rho);
        dxSquared = gradExpected.mul(gradExpected);
        msdxTmp.addi(dxSquared.muli(1 - rho));

        msg.put(key, msgTmp);
        msdx.put(key, msdxTmp);
      }
      assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4);
    }
  }
Esempio n. 24
0
  /** Apply gradient normalization: scale based on L2, clipping etc. */
  public void preApply(Layer layer, Gradient gradient, int iteration) {
    GradientNormalization normalization = layer.conf().getLayer().getGradientNormalization();
    if (normalization == null || normalization == GradientNormalization.None) return; // no op

    final double threshold = layer.conf().getLayer().getGradientNormalizationThreshold();

    switch (normalization) {
      case RenormalizeL2PerLayer:
        double sumSquares = 0.0;
        for (INDArray g : gradient.gradientForVariable().values()) {
          double l2 = g.norm2Number().doubleValue();
          // l2 norm: sqrt(sum_i g_i^2)
          sumSquares += l2 * l2;
        }
        double layerL2 = FastMath.sqrt(sumSquares);
        for (INDArray g : gradient.gradientForVariable().values()) {
          g.divi(layerL2);
        }
        break;
      case RenormalizeL2PerParamType:
        for (INDArray g : gradient.gradientForVariable().values()) {
          double l2 =
              Nd4j.getExecutioner().execAndReturn(new Norm2(g)).getFinalResult().doubleValue();
          g.divi(l2);
        }
        break;
      case ClipElementWiseAbsoluteValue:
        Condition absValueCondition = new AbsValueGreaterThan(threshold);
        Function<Number, Number> clipFn =
            new Function<Number, Number>() {
              @Override
              public Number apply(Number number) {
                return (number.doubleValue() > threshold ? threshold : -threshold);
              }
            };

        for (INDArray g : gradient.gradientForVariable().values()) {
          BooleanIndexing.applyWhere(g, absValueCondition, clipFn);
        }
        break;
      case ClipL2PerLayer:
        double sumSquares2 = 0.0;
        for (INDArray g : gradient.gradientForVariable().values()) {
          double l2 =
              Nd4j.getExecutioner().execAndReturn(new Norm2(g)).getFinalResult().doubleValue();
          // l2 norm: sqrt(sum_i g_i^2)
          sumSquares2 += l2 * l2;
        }
        double layerL22 = FastMath.sqrt(sumSquares2);
        if (layerL22 > threshold) {
          double scalingFactor = threshold / layerL22; // g = g / l2 * threshold ->
          for (INDArray g : gradient.gradientForVariable().values()) {
            g.muli(scalingFactor);
          }
        }
        break;
      case ClipL2PerParamType:
        for (INDArray g : gradient.gradientForVariable().values()) {
          double l2 = g.norm2Number().doubleValue();
          if (l2 > threshold) {
            double scalingFactor = l2 / threshold;
            g.divi(scalingFactor);
          }
        }
        break;
      default:
        throw new RuntimeException(
            "Unknown (or not implemented) gradient normalization strategy: " + normalization);
    }
  }
  public static void main(String... args) throws Exception {
    int numFeatures = 40;
    int iterations = 5;
    int seed = 123;
    int listenerFreq = iterations / 5;
    Nd4j.getRandom().setSeed(seed);

    log.info("Load dat....");
    INDArray input =
        Nd4j.create(
            2,
            numFeatures); // have to be at least two or else output layer gradient is a scalar and
    // cause exception
    INDArray labels = Nd4j.create(2, 2);

    INDArray row0 = Nd4j.create(1, numFeatures);
    row0.assign(0.1);
    input.putRow(0, row0);
    labels.put(0, 1, 1); // set the 4th column

    INDArray row1 = Nd4j.create(1, numFeatures);
    row1.assign(0.2);

    input.putRow(1, row1);
    labels.put(1, 0, 1); // set the 2nd column

    DataSet trainingSet = new DataSet(input, labels);

    log.info("Build model....");
    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .layer(new RBM())
            .nIn(trainingSet.numInputs())
            .nOut(trainingSet.numOutcomes())
            .seed(seed)
            .weightInit(WeightInit.SIZE)
            .constrainGradientToUnitNorm(true)
            .iterations(iterations)
            .activationFunction("tanh")
            .visibleUnit(RBM.VisibleUnit.GAUSSIAN)
            .hiddenUnit(RBM.HiddenUnit.RECTIFIED)
            .lossFunction(LossFunctions.LossFunction.RMSE_XENT)
            .learningRate(1e-1f)
            .optimizationAlgo(OptimizationAlgorithm.ITERATION_GRADIENT_DESCENT)
            .build();
    Layer model = LayerFactories.getFactory(conf).create(conf);
    model.setIterationListeners(
        Collections.singletonList((IterationListener) new ScoreIterationListener(listenerFreq)));

    log.info("Evaluate weights....");
    INDArray w = model.getParam(DefaultParamInitializer.WEIGHT_KEY);
    log.info("Weights: " + w);

    log.info("Train model....");
    model.fit(trainingSet.getFeatureMatrix());

    log.info("Visualize training results....");
    // Work in progress to get NeuralNetPlotter functioning
    NeuralNetPlotter plotter = new NeuralNetPlotter();
    plotter.plotNetworkGradient(model, model.gradient(), 10);
  }
Esempio n. 26
0
  public static void testBackward() {
    for (TestCase testCase : allTestCases) {
      try (BufferedWriter writer =
          new BufferedWriter(new FileWriter(new File("dl4jPerformance.csv"), true))) {
        ConvolutionLayer convolutionLayerBuilder =
            new ConvolutionLayer.Builder(testCase.kW, testCase.kH)
                .nIn(testCase.nInputPlane)
                .stride(testCase.dW, testCase.dH)
                .padding(testCase.padW, testCase.padH)
                .nOut(testCase.nOutputPlane)
                .build();

        MultiLayerConfiguration.Builder builder =
            new NeuralNetConfiguration.Builder().list().layer(0, convolutionLayerBuilder);

        MultiLayerConfiguration conf = builder.build();
        MultiLayerNetwork model = new MultiLayerNetwork(conf);
        model.init();
        INDArray input =
            Nd4j.rand(
                seed, batchSize, testCase.nInputPlane, testCase.inputWidth, testCase.inputHeight);
        model.setInput(input);
        model.getLayer(0).setInput(input);
        model.feedForward();
        org.deeplearning4j.nn.api.Layer convolutionLayer = model.getLayer(0);

        INDArray output = convolutionLayer.activate();
        INDArray epsilon =
            Nd4j.rand(seed, output.size(0), output.size(1), output.size(2), output.size(3));
        Method initGradientView = model.getClass().getDeclaredMethod("initGradientsView");
        initGradientView.setAccessible(true);
        initGradientView.invoke(model);

        double start = System.nanoTime();
        for (int i = 0; i < backwardIterations; i++) {
          convolutionLayer.backpropGradient(epsilon);
        }
        double end = System.nanoTime();
        double timeMillis = (end - start) / 1e6 / backwardIterations;

        writer.write(
            "Convolution("
                + testCase.nInputPlane
                + " "
                + testCase.nOutputPlane
                + " "
                + testCase.kW
                + " "
                + testCase.kH
                + " "
                + testCase.dW
                + " "
                + testCase.dH
                + " "
                + testCase.padW
                + " "
                + testCase.padH
                + " "
                + testCase.inputWidth
                + " "
                + testCase.inputHeight
                + ") "
                + " backward, "
                + timeMillis
                + "\n");
      } catch (Exception ex) {
        ex.printStackTrace();
      }
    }
  }