// note precision is off on this test but the numbers are close // investigation in a future release should determine how to resolve @Test public void testCalculateDeltaContained() { Layer layer = getContainedConfig(); INDArray input = getContainedData(); INDArray col = getContainedCol(); INDArray epsilon = Nd4j.ones(1, 2, 4, 4); INDArray expectedOutput = Nd4j.create( new double[] { 0.02036651, 0.02036651, 0.02036651, 0.02036651, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.02036651, 0.02036651, 0.02036651, 0.02036651, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.02036651, 0.02036651, 0.02036651, 0.02036651, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.02036651, 0.02036651, 0.02036651, 0.02036651, 0.00039383, 0.00039383, 0.00039383, 0.00039383 }, new int[] {1, 2, 4, 4}); layer.setInput(input); org.deeplearning4j.nn.layers.convolution.ConvolutionLayer layer2 = (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) layer; layer2.setCol(col); INDArray delta = layer2.calculateDelta(epsilon); assertArrayEquals(expectedOutput.shape(), delta.shape()); assertEquals(expectedOutput, delta); }
// TODO remove/move, technically this is testing Nd4j functionality @Test public void testCreateFeatureMapMethod() { Layer layer = getContainedConfig(); INDArray input = getContainedData(); int inputWidth = input.shape()[0]; int featureMapWidth = (inputWidth + layer.conf().getPadding()[0] * 2 - layer.conf().getKernelSize()[0]) / layer.conf().getStride()[0] + 1; INDArray expectedOutput = Nd4j.create( new double[] { 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4 }, new int[] {1, 1, 2, 2, 4, 4}); layer.setInput(input); org.deeplearning4j.nn.layers.convolution.ConvolutionLayer layer2 = (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) layer; INDArray featureMaps = layer2.createFeatureMapColumn(); assertEquals(featureMapWidth, featureMaps.shape()[4]); assertEquals(expectedOutput.shape(), featureMaps.shape()); assertEquals(expectedOutput, featureMaps); }
@Test public void testAdamUpdater() { INDArray m, v; double lr = 0.01; int iteration = 0; double beta1 = 0.8; double beta2 = 0.888; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .iterations(iteration) .adamMeanDecay(beta1) .adamVarDecay(beta2) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.ADAM) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, iteration, 1); double beta1t = FastMath.pow(beta1, iteration); double beta2t = FastMath.pow(beta2, iteration); double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t); if (Double.isNaN(alphat) || alphat == 0.0) alphat = epsilon; Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { val = entry.getValue(); m = Nd4j.zeros(val.shape()); v = Nd4j.zeros(val.shape()); m.muli(beta1).addi(val.mul(1.0 - beta1)); v.muli(beta2).addi(val.mul(val).mul(1.0 - beta2)); gradExpected = m.mul(alphat).divi(Transforms.sqrt(v).addi(epsilon)); if (!gradExpected.equals(gradient.getGradientFor(entry.getKey()))) { System.out.println(Arrays.toString(gradExpected.dup().data().asFloat())); System.out.println( Arrays.toString(gradient.getGradientFor(entry.getKey()).dup().data().asFloat())); } assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } assertEquals(beta1, layer.conf().getLayer().getAdamMeanDecay(), 1e-4); assertEquals(beta2, layer.conf().getLayer().getAdamVarDecay(), 1e-4); }
@Test public void testSGDUpdater() { double lr = 0.05; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.SGD) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); updater.update(layer, gradient, -1, 1); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { val = entry.getValue(); gradExpected = val.mul(lr); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4); }
@Test public void testSubSampleLayerNoneBackprop() throws Exception { Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); Pair<Gradient, INDArray> out = layer.backpropGradient(epsilon); assertEquals(epsilon.shape().length, out.getSecond().shape().length); assertEquals(nExamples, out.getSecond().size(1)); // depth retained }
@Test public void testCNNInputSetupMNIST() throws Exception { INDArray input = getMnistData(); Layer layer = getMNISTConfig(); layer.activate(input); assertEquals(input, layer.input()); assertArrayEquals(input.shape(), layer.input().shape()); }
@Test public void testGravesLSTMInit() { int nIn = 8; int nOut = 25; int nHiddenUnits = 17; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .list(2) .layer( 0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder() .nIn(nIn) .nOut(nHiddenUnits) .weightInit(WeightInit.DISTRIBUTION) .activation("tanh") .build()) .layer( 1, new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) .nIn(nHiddenUnits) .nOut(nOut) .weightInit(WeightInit.DISTRIBUTION) .activation("tanh") .build()) .build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); // Ensure that we have the correct number weights and biases, that these have correct shape etc. Layer layer = network.getLayer(0); assertTrue(layer instanceof GravesLSTM); Map<String, INDArray> paramTable = layer.paramTable(); assertTrue(paramTable.size() == 3); // 2 sets of weights, 1 set of biases INDArray recurrentWeights = paramTable.get(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY); assertArrayEquals( recurrentWeights.shape(), new int[] { nHiddenUnits, 4 * nHiddenUnits + 3 }); // Should be shape: [layerSize,4*layerSize+3] INDArray inputWeights = paramTable.get(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY); assertArrayEquals( inputWeights.shape(), new int[] {nIn, 4 * nHiddenUnits}); // Should be shape: [nIn,4*layerSize] INDArray biases = paramTable.get(GravesLSTMParamInitializer.BIAS_KEY); assertArrayEquals( biases.shape(), new int[] {1, 4 * nHiddenUnits}); // Should be shape: [1,4*layerSize] // Want forget gate biases to be initialized to > 0. See parameter initializer for details INDArray forgetGateBiases = biases.get(new INDArrayIndex[] {NDArrayIndex.interval(nHiddenUnits, 2 * nHiddenUnits)}); assertTrue(forgetGateBiases.gt(0).sum(Integer.MAX_VALUE).getDouble(0) == nHiddenUnits); int nParams = recurrentWeights.length() + inputWeights.length() + biases.length(); assertTrue(nParams == layer.numParams()); }
@Test public void testFeatureMapShape() throws Exception { INDArray input = getMnistData(); Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); INDArray convActivations = layer.activate(input); assertEquals(featureMapWidth, convActivations.size(2)); assertEquals(depth, convActivations.size(0)); }
// note precision is off on this test but the numbers are close // investigation in a future release should determine how to resolve @Test public void testBackpropResultsContained() { Layer layer = getContainedConfig(); INDArray input = getContainedData(); INDArray col = getContainedCol(); INDArray epsilon = Nd4j.ones(1, 2, 4, 4); INDArray expectedBiasGradient = Nd4j.create(new double[] {0.16608272, 0.16608272}, new int[] {1, 2}); INDArray expectedWeightGradient = Nd4j.create( new double[] { 0.17238397, 0.17238397, 0.33846668, 0.33846668, 0.17238397, 0.17238397, 0.33846668, 0.33846668 }, new int[] {2, 1, 2, 2}); INDArray expectedEpsilon = Nd4j.create( new double[] { 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0., 0., 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0., 0., 0.02036651, 0.02036651, 0.02036651, 0.02036651, 0.02036651, 0.02036651, 0., 0., 0.02036651, 0.02036651, 0.02036651, 0.02036651, 0.02036651, 0.02036651, 0., 0., 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0., 0., 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0.00039383, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0. }, new int[] {1, 1, 8, 8}); layer.setInput(input); org.deeplearning4j.nn.layers.convolution.ConvolutionLayer layer2 = (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) layer; layer2.setCol(col); Pair<Gradient, INDArray> pair = layer2.backpropGradient(epsilon); assertArrayEquals(expectedEpsilon.shape(), pair.getSecond().shape()); assertArrayEquals(expectedWeightGradient.shape(), pair.getFirst().getGradientFor("W").shape()); assertArrayEquals(expectedBiasGradient.shape(), pair.getFirst().getGradientFor("b").shape()); assertEquals(expectedEpsilon, pair.getSecond()); assertEquals(expectedWeightGradient, pair.getFirst().getGradientFor("W")); assertEquals(expectedBiasGradient, pair.getFirst().getGradientFor("b")); }
@Test public void testCNNInputSetup() throws Exception { INDArray input = getMnistData(); int[] stride = new int[] {3, 3}; int[] padding = new int[] {1, 1}; Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); layer.activate(input); assertEquals(input, layer.input()); assertEquals(input.shape(), layer.input().shape()); }
@Test public void testGRUInit() { int nIn = 8; int nOut = 25; int nHiddenUnits = 17; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .list(2) .layer( 0, new org.deeplearning4j.nn.conf.layers.GRU.Builder() .nIn(nIn) .nOut(nHiddenUnits) .weightInit(WeightInit.DISTRIBUTION) .build()) .layer( 1, new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) .nIn(nHiddenUnits) .nOut(nOut) .weightInit(WeightInit.DISTRIBUTION) .build()) .build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); // Ensure that we have the correct number weights and biases, that these have correct shape etc. Layer layer = network.getLayer(0); assertTrue(layer instanceof GRU); Map<String, INDArray> paramTable = layer.paramTable(); assertTrue(paramTable.size() == 3); // 2 sets of weights, 1 set of biases INDArray recurrentWeights = paramTable.get(GRUParamInitializer.RECURRENT_WEIGHT_KEY); assertArrayEquals( recurrentWeights.shape(), new int[] {nHiddenUnits, 3 * nHiddenUnits}); // Should be shape: [layerSize,3*layerSize] INDArray inputWeights = paramTable.get(GRUParamInitializer.INPUT_WEIGHT_KEY); assertArrayEquals( inputWeights.shape(), new int[] {nIn, 3 * nHiddenUnits}); // Should be shape: [nIn,3*layerSize] INDArray biases = paramTable.get(GRUParamInitializer.BIAS_KEY); assertArrayEquals( biases.shape(), new int[] {1, 3 * nHiddenUnits}); // Should be shape: [1,3*layerSize] int nParams = recurrentWeights.length() + inputWeights.length() + biases.length(); assertTrue(nParams == layer.numParams()); }
public Layer getContainedConfig() { int[] kernelSize = new int[] {2, 2}; int[] stride = new int[] {2, 2}; int[] padding = new int[] {0, 0}; int nChannelsIn = 1; int depth = 2; INDArray W = Nd4j.create(new double[] {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}, new int[] {2, 1, 2, 2}); INDArray b = Nd4j.create(new double[] {1, 1}); Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); layer.setParam("W", W); layer.setParam("b", b); return layer; }
@Test public void testRMSPropUpdater() { double lr = 0.01; double rmsDecay = 0.25; Map<String, INDArray> lastG = new HashMap<>(); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .rmsDecay(rmsDecay) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, -1, 1); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { key = entry.getKey(); val = entry.getValue(); INDArray lastGTmp = lastG.get(key); if (lastGTmp == null) lastGTmp = Nd4j.zeros(val.shape()); lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay)); gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(Nd4j.EPS_THRESHOLD))); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); lastG.put(key, lastGTmp); } assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4); }
/** * Apply the regularization * * @param layer * @param gradient * @param param */ public void postApply(Layer layer, INDArray gradient, String param) { NeuralNetConfiguration conf = layer.conf(); INDArray params = layer.getParam(param); if (conf.isUseRegularization() && conf.getLayer().getL2() > 0 && !(param.equals(DefaultParamInitializer.BIAS_KEY))) gradient.addi( params.mul( conf.getLayer() .getL2())); // dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function if (conf.isUseRegularization() && conf.getLayer().getL1() > 0 && !(param.equals(DefaultParamInitializer.BIAS_KEY))) gradient.addi(Transforms.sign(params).muli(conf.getLayer().getL1())); if (conf.isMiniBatch()) gradient.divi(layer.getInputMiniBatchSize()); if (conf.isConstrainGradientToUnitNorm()) gradient.divi(gradient.norm2(Integer.MAX_VALUE)); }
@Test public void testFeatureMapShapeMNIST() throws Exception { int inputWidth = 28; int[] stride = new int[] {2, 2}; int[] padding = new int[] {0, 0}; int[] kernelSize = new int[] {9, 9}; int nChannelsIn = 1; int depth = 20; int featureMapWidth = (inputWidth + padding[1] * 2 - kernelSize[1]) / stride[1] + 1; INDArray input = getMnistData(); Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); INDArray convActivations = layer.activate(input); assertEquals(featureMapWidth, convActivations.size(2)); assertEquals(depth, convActivations.size(1)); }
@Test public void testNestorovsUpdater() { double lr = 1e-2; double mu = 0.6; INDArray v, vPrev; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .momentum(mu) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, -1, 1); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { val = entry.getValue(); v = Nd4j.zeros(val.shape()); vPrev = v; v = vPrev.mul(mu).subi(val.mul(lr)); gradExpected = vPrev.muli(mu).addi(v.mul(-mu - 1)); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } assertEquals(mu, layer.conf().getLayer().getMomentum(), 1e-4); }
@Override public GradientUpdater init(String variable, INDArray gradient, Layer layer) { AdaGrad adaGrad = (AdaGrad) updaterForVariable.get(variable); if (adaGrad == null) { adaGrad = new AdaGrad(layer.conf().getLr()); updaterForVariable.put(variable, adaGrad); } return adaGrad; }
public Layer getContainedConfig() { // int inputWidth = input.shape()[0]; // int inputHeight = input.shape()[1]; int[] kernelSize = new int[] {2, 2}; int[] stride = new int[] {2, 2}; int[] padding = new int[] {0, 0}; int nChannelsIn = 1; int depth = 2; // int featureMapWidth = (inputWidth + padding[0] * 2 - kernelSize[0]) / stride[0] + 1; // int featureMapHeight = (inputHeight + padding[1] * 2 - kernelSize[1]) / stride[0] + 1; INDArray W = Nd4j.create(new double[] {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}, new int[] {2, 1, 2, 2}); INDArray b = Nd4j.create(new double[] {1, 1}); Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); layer.setParam("W", W); layer.setParam("b", b); return layer; }
private static void checkNinNoutForEachLayer( int[] expNin, int[] expNout, MultiLayerConfiguration conf, MultiLayerNetwork network) { // Check configuration for (int i = 0; i < expNin.length; i++) { NeuralNetConfiguration layerConf = conf.getConf(i); assertTrue(layerConf.getNIn() == expNin[i]); assertTrue(layerConf.getNOut() == expNout[i]); } // Check Layer for (int i = 0; i < expNin.length; i++) { Layer layer = network.getLayers()[i]; assertTrue(layer.conf().getNIn() == expNin[i]); assertTrue(layer.conf().getNOut() == expNout[i]); int[] weightShape = layer.getParam(DefaultParamInitializer.WEIGHT_KEY).shape(); assertTrue(weightShape[0] == expNin[i]); assertTrue(weightShape[1] == expNout[i]); } }
@Test public void testActivateResults() { Layer layer = getContainedConfig(); INDArray input = getContainedData(); INDArray expectedOutput = Nd4j.create( new double[] { 0.98201379, 0.98201379, 0.98201379, 0.98201379, 0.99966465, 0.99966465, 0.99966465, 0.99966465, 0.98201379, 0.98201379, 0.98201379, 0.98201379, 0.99966465, 0.99966465, 0.99966465, 0.99966465, 0.98201379, 0.98201379, 0.98201379, 0.98201379, 0.99966465, 0.99966465, 0.99966465, 0.99966465, 0.98201379, 0.98201379, 0.98201379, 0.98201379, 0.99966465, 0.99966465, 0.99966465, 0.99966465 }, new int[] {1, 2, 4, 4}); INDArray convActivations = layer.activate(input); assertEquals(expectedOutput, convActivations); assertEquals(expectedOutput.shape(), convActivations.shape()); }
@Test public void testAdaGradUpdater() { double lr = 1e-2; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, -1, 1); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { val = entry.getValue(); gradExpected = Transforms.sqrt(val.mul(val).add(epsilon)).rdiv(lr).mul(val); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4); }
public static void main(String[] args) { final int numRows = 2; final int numColumns = 2; int nChannels = 1; int outputNum = 3; int numSamples = 150; int batchSize = 110; int iterations = 10; int splitTrainNum = 100; int seed = 123; int listenerFreq = 1; /** Set a neural network configuration with multiple layers */ log.info("Load data...."); DataSetIterator irisIter = new IrisDataSetIterator(batchSize, numSamples); DataSet iris = irisIter.next(); iris.normalizeZeroMeanZeroUnitVariance(); SplitTestAndTrain trainTest = iris.splitTestAndTrain(splitTrainNum, new Random(seed)); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .iterations(iterations) .batchSize(batchSize) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .constrainGradientToUnitNorm(true) .l2(2e-4) .regularization(true) .useDropConnect(true) .list(2) .layer( 0, new ConvolutionLayer.Builder(new int[] {1, 1}) .nIn(nChannels) .nOut(6) .dropOut(0.5) .activation("relu") .weightInit(WeightInit.XAVIER) .build()) .layer( 1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .nIn(6) .nOut(outputNum) .weightInit(WeightInit.XAVIER) .activation("softmax") .build()) .inputPreProcessor(0, new FeedForwardToCnnPreProcessor(numRows, numColumns, nChannels)) .inputPreProcessor(1, new CnnToFeedForwardPreProcessor()) .backprop(true) .pretrain(false) .build(); log.info("Build model...."); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); model.setListeners(Arrays.asList((IterationListener) new ScoreIterationListener(listenerFreq))); log.info("Train model...."); model.fit(trainTest.getTrain()); log.info("Evaluate weights...."); for (org.deeplearning4j.nn.api.Layer layer : model.getLayers()) { INDArray w = layer.getParam(DefaultParamInitializer.WEIGHT_KEY); log.info("Weights: " + w); } log.info("Evaluate model...."); Evaluation eval = new Evaluation(outputNum); INDArray output = model.output(trainTest.getTest().getFeatureMatrix()); eval.eval(trainTest.getTest().getLabels(), output); log.info(eval.stats()); log.info("****************Example finished********************"); }
@Test public void testAdaDeltaUpdate() { INDArray dxSquared; Map<String, INDArray> msg = new HashMap<>(); Map<String, INDArray> msdx = new HashMap<>(); double rho = 0.85; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .rho(rho) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); for (int i = 0; i < 2; i++) { updater.update(layer, gradient, i, 1); // calculations for one iteration / update for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { key = entry.getKey(); val = entry.getValue(); INDArray msgTmp = msg.get(key); INDArray msdxTmp = msdx.get(key); if (msgTmp == null) { msgTmp = Nd4j.zeros(val.shape()); msdxTmp = Nd4j.zeros(val.shape()); } msgTmp.muli(rho); msgTmp.addi(1 - rho).muli(val.mul(val)); gradExpected = Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD)) .divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD))) .muli(val); gradientDup.setGradientFor(key, gradExpected); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); msdxTmp.muli(rho); dxSquared = gradExpected.mul(gradExpected); msdxTmp.addi(dxSquared.muli(1 - rho)); msg.put(key, msgTmp); msdx.put(key, msdxTmp); } assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4); } }
/** Apply gradient normalization: scale based on L2, clipping etc. */ public void preApply(Layer layer, Gradient gradient, int iteration) { GradientNormalization normalization = layer.conf().getLayer().getGradientNormalization(); if (normalization == null || normalization == GradientNormalization.None) return; // no op final double threshold = layer.conf().getLayer().getGradientNormalizationThreshold(); switch (normalization) { case RenormalizeL2PerLayer: double sumSquares = 0.0; for (INDArray g : gradient.gradientForVariable().values()) { double l2 = g.norm2Number().doubleValue(); // l2 norm: sqrt(sum_i g_i^2) sumSquares += l2 * l2; } double layerL2 = FastMath.sqrt(sumSquares); for (INDArray g : gradient.gradientForVariable().values()) { g.divi(layerL2); } break; case RenormalizeL2PerParamType: for (INDArray g : gradient.gradientForVariable().values()) { double l2 = Nd4j.getExecutioner().execAndReturn(new Norm2(g)).getFinalResult().doubleValue(); g.divi(l2); } break; case ClipElementWiseAbsoluteValue: Condition absValueCondition = new AbsValueGreaterThan(threshold); Function<Number, Number> clipFn = new Function<Number, Number>() { @Override public Number apply(Number number) { return (number.doubleValue() > threshold ? threshold : -threshold); } }; for (INDArray g : gradient.gradientForVariable().values()) { BooleanIndexing.applyWhere(g, absValueCondition, clipFn); } break; case ClipL2PerLayer: double sumSquares2 = 0.0; for (INDArray g : gradient.gradientForVariable().values()) { double l2 = Nd4j.getExecutioner().execAndReturn(new Norm2(g)).getFinalResult().doubleValue(); // l2 norm: sqrt(sum_i g_i^2) sumSquares2 += l2 * l2; } double layerL22 = FastMath.sqrt(sumSquares2); if (layerL22 > threshold) { double scalingFactor = threshold / layerL22; // g = g / l2 * threshold -> for (INDArray g : gradient.gradientForVariable().values()) { g.muli(scalingFactor); } } break; case ClipL2PerParamType: for (INDArray g : gradient.gradientForVariable().values()) { double l2 = g.norm2Number().doubleValue(); if (l2 > threshold) { double scalingFactor = l2 / threshold; g.divi(scalingFactor); } } break; default: throw new RuntimeException( "Unknown (or not implemented) gradient normalization strategy: " + normalization); } }
public static void main(String... args) throws Exception { int numFeatures = 40; int iterations = 5; int seed = 123; int listenerFreq = iterations / 5; Nd4j.getRandom().setSeed(seed); log.info("Load dat...."); INDArray input = Nd4j.create( 2, numFeatures); // have to be at least two or else output layer gradient is a scalar and // cause exception INDArray labels = Nd4j.create(2, 2); INDArray row0 = Nd4j.create(1, numFeatures); row0.assign(0.1); input.putRow(0, row0); labels.put(0, 1, 1); // set the 4th column INDArray row1 = Nd4j.create(1, numFeatures); row1.assign(0.2); input.putRow(1, row1); labels.put(1, 0, 1); // set the 2nd column DataSet trainingSet = new DataSet(input, labels); log.info("Build model...."); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .layer(new RBM()) .nIn(trainingSet.numInputs()) .nOut(trainingSet.numOutcomes()) .seed(seed) .weightInit(WeightInit.SIZE) .constrainGradientToUnitNorm(true) .iterations(iterations) .activationFunction("tanh") .visibleUnit(RBM.VisibleUnit.GAUSSIAN) .hiddenUnit(RBM.HiddenUnit.RECTIFIED) .lossFunction(LossFunctions.LossFunction.RMSE_XENT) .learningRate(1e-1f) .optimizationAlgo(OptimizationAlgorithm.ITERATION_GRADIENT_DESCENT) .build(); Layer model = LayerFactories.getFactory(conf).create(conf); model.setIterationListeners( Collections.singletonList((IterationListener) new ScoreIterationListener(listenerFreq))); log.info("Evaluate weights...."); INDArray w = model.getParam(DefaultParamInitializer.WEIGHT_KEY); log.info("Weights: " + w); log.info("Train model...."); model.fit(trainingSet.getFeatureMatrix()); log.info("Visualize training results...."); // Work in progress to get NeuralNetPlotter functioning NeuralNetPlotter plotter = new NeuralNetPlotter(); plotter.plotNetworkGradient(model, model.gradient(), 10); }
public static void testBackward() { for (TestCase testCase : allTestCases) { try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File("dl4jPerformance.csv"), true))) { ConvolutionLayer convolutionLayerBuilder = new ConvolutionLayer.Builder(testCase.kW, testCase.kH) .nIn(testCase.nInputPlane) .stride(testCase.dW, testCase.dH) .padding(testCase.padW, testCase.padH) .nOut(testCase.nOutputPlane) .build(); MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().list().layer(0, convolutionLayerBuilder); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork model = new MultiLayerNetwork(conf); model.init(); INDArray input = Nd4j.rand( seed, batchSize, testCase.nInputPlane, testCase.inputWidth, testCase.inputHeight); model.setInput(input); model.getLayer(0).setInput(input); model.feedForward(); org.deeplearning4j.nn.api.Layer convolutionLayer = model.getLayer(0); INDArray output = convolutionLayer.activate(); INDArray epsilon = Nd4j.rand(seed, output.size(0), output.size(1), output.size(2), output.size(3)); Method initGradientView = model.getClass().getDeclaredMethod("initGradientsView"); initGradientView.setAccessible(true); initGradientView.invoke(model); double start = System.nanoTime(); for (int i = 0; i < backwardIterations; i++) { convolutionLayer.backpropGradient(epsilon); } double end = System.nanoTime(); double timeMillis = (end - start) / 1e6 / backwardIterations; writer.write( "Convolution(" + testCase.nInputPlane + " " + testCase.nOutputPlane + " " + testCase.kW + " " + testCase.kH + " " + testCase.dW + " " + testCase.dH + " " + testCase.padW + " " + testCase.padH + " " + testCase.inputWidth + " " + testCase.inputHeight + ") " + " backward, " + timeMillis + "\n"); } catch (Exception ex) { ex.printStackTrace(); } } }