@Test public void testSGDUpdater() { double lr = 0.05; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.SGD) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); updater.update(layer, gradient, -1, 1); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { val = entry.getValue(); gradExpected = val.mul(lr); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4); }
@Test public void testNoOpUpdater() { Random r = new Random(12345L); double lr = 0.5; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.NONE) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); for (int i = 0; i < weightGradient.length(); i++) weightGradient.putScalar(i, r.nextDouble()); for (int i = 0; i < biasGradient.length(); i++) biasGradient.putScalar(i, r.nextDouble()); gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradient); gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradient); updater.update(layer, gradient, -1, 1); INDArray weightGradActual = gradient.getGradientFor(DefaultParamInitializer.WEIGHT_KEY); INDArray biasGradActual = gradient.getGradientFor(DefaultParamInitializer.BIAS_KEY); assertEquals(weightGradient, weightGradActual); assertEquals(biasGradient, biasGradActual); }
@Test public void testAdamUpdater() { INDArray m, v; double lr = 0.01; int iteration = 0; double beta1 = 0.8; double beta2 = 0.888; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .iterations(iteration) .adamMeanDecay(beta1) .adamVarDecay(beta2) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.ADAM) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, iteration, 1); double beta1t = FastMath.pow(beta1, iteration); double beta2t = FastMath.pow(beta2, iteration); double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t); if (Double.isNaN(alphat) || alphat == 0.0) alphat = epsilon; Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { val = entry.getValue(); m = Nd4j.zeros(val.shape()); v = Nd4j.zeros(val.shape()); m.muli(beta1).addi(val.mul(1.0 - beta1)); v.muli(beta2).addi(val.mul(val).mul(1.0 - beta2)); gradExpected = m.mul(alphat).divi(Transforms.sqrt(v).addi(epsilon)); if (!gradExpected.equals(gradient.getGradientFor(entry.getKey()))) { System.out.println(Arrays.toString(gradExpected.dup().data().asFloat())); System.out.println( Arrays.toString(gradient.getGradientFor(entry.getKey()).dup().data().asFloat())); } assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } assertEquals(beta1, layer.conf().getLayer().getAdamMeanDecay(), 1e-4); assertEquals(beta2, layer.conf().getLayer().getAdamVarDecay(), 1e-4); }
@Test public void testRMSPropUpdater() { double lr = 0.01; double rmsDecay = 0.25; Map<String, INDArray> lastG = new HashMap<>(); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .rmsDecay(rmsDecay) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, -1, 1); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { key = entry.getKey(); val = entry.getValue(); INDArray lastGTmp = lastG.get(key); if (lastGTmp == null) lastGTmp = Nd4j.zeros(val.shape()); lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay)); gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(Nd4j.EPS_THRESHOLD))); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); lastG.put(key, lastGTmp); } assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4); }
@Test public void testNestorovsUpdater() { double lr = 1e-2; double mu = 0.6; INDArray v, vPrev; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .momentum(mu) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, -1, 1); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { val = entry.getValue(); v = Nd4j.zeros(val.shape()); vPrev = v; v = vPrev.mul(mu).subi(val.mul(lr)); gradExpected = vPrev.muli(mu).addi(v.mul(-mu - 1)); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } assertEquals(mu, layer.conf().getLayer().getMomentum(), 1e-4); }
private static Layer getCNNConfig( int nIn, int nOut, int[] kernelSize, int[] stride, int[] padding) { ConvolutionLayer layer = new ConvolutionLayer.Builder(kernelSize, stride, padding).nIn(nIn).nOut(nOut).build(); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .activationFunction("sigmoid") .iterations(1) .layer(layer) .build(); return LayerFactories.getFactory(conf).create(conf); }
@Test public void testAdaGradUpdater() { double lr = 1e-2; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .learningRate(lr) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); updater.update(layer, gradient, -1, 1); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient); for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { val = entry.getValue(); gradExpected = Transforms.sqrt(val.mul(val).add(epsilon)).rdiv(lr).mul(val); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); } assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4); }
public ModelAndGradient() { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .lossFunction(LossFunctions.LossFunction.MCXENT) .optimizationAlgo(OptimizationAlgorithm.ITERATION_GRADIENT_DESCENT) .activationFunction("softmax") .iterations(10) .weightInit(WeightInit.XAVIER) .learningRate(1e-1) .nIn(4) .nOut(3) .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer()) .build(); OutputLayer l = LayerFactories.getFactory(conf.getLayer()) .create(conf, Arrays.<IterationListener>asList(new ScoreIterationListener(1))); this.model = l; l.setInput(Nd4j.ones(4)); l.setLabels(Nd4j.ones(3)); this.gradient = l.gradient(); }
@Test public void testModelSerde() throws Exception { ObjectMapper mapper = getMapper(); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .momentum(0.9f) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .iterations(1000) .constrainGradientToUnitNorm(true) .learningRate(1e-1f) .layer( new org.deeplearning4j.nn.conf.layers.AutoEncoder.Builder() .nIn(4) .nOut(3) .corruptionLevel(0.6) .sparsity(0.5) .lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY) .build()) .build(); DataSet d2 = new IrisDataSetIterator(150, 150).next(); INDArray input = d2.getFeatureMatrix(); AutoEncoder da = LayerFactories.getFactory(conf.getLayer()) .create( conf, Arrays.<IterationListener>asList( new ScoreIterationListener(1), new HistogramIterationListener(1)), 0); da.setInput(input); ModelAndGradient g = new ModelAndGradient(da); String json = mapper.writeValueAsString(g); ModelAndGradient read = mapper.readValue(json, ModelAndGradient.class); assertEquals(g, read); }
@Test public void testAdaDeltaUpdate() { INDArray dxSquared; Map<String, INDArray> msg = new HashMap<>(); Map<String, INDArray> msdx = new HashMap<>(); double rho = 0.85; NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .rho(rho) .layer( new DenseLayer.Builder() .nIn(nIn) .nOut(nOut) .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA) .build()) .build(); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true); INDArray params = Nd4j.create(1, numParams); Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true); Updater updater = UpdaterCreator.getUpdater(layer); int updaterStateSize = updater.stateSizeForLayer(layer); INDArray updaterState = Nd4j.create(1, updaterStateSize); updater.setStateViewArray(layer, updaterState, true); Gradient gradientDup = new DefaultGradient(); gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup()); gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup()); for (int i = 0; i < 2; i++) { updater.update(layer, gradient, i, 1); // calculations for one iteration / update for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) { key = entry.getKey(); val = entry.getValue(); INDArray msgTmp = msg.get(key); INDArray msdxTmp = msdx.get(key); if (msgTmp == null) { msgTmp = Nd4j.zeros(val.shape()); msdxTmp = Nd4j.zeros(val.shape()); } msgTmp.muli(rho); msgTmp.addi(1 - rho).muli(val.mul(val)); gradExpected = Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD)) .divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD))) .muli(val); gradientDup.setGradientFor(key, gradExpected); assertEquals(gradExpected, gradient.getGradientFor(entry.getKey())); msdxTmp.muli(rho); dxSquared = gradExpected.mul(gradExpected); msdxTmp.addi(dxSquared.muli(1 - rho)); msg.put(key, msgTmp); msdx.put(key, msdxTmp); } assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4); } }
public static void main(String... args) throws Exception { int numFeatures = 40; int iterations = 5; int seed = 123; int listenerFreq = iterations / 5; Nd4j.getRandom().setSeed(seed); log.info("Load dat...."); INDArray input = Nd4j.create( 2, numFeatures); // have to be at least two or else output layer gradient is a scalar and // cause exception INDArray labels = Nd4j.create(2, 2); INDArray row0 = Nd4j.create(1, numFeatures); row0.assign(0.1); input.putRow(0, row0); labels.put(0, 1, 1); // set the 4th column INDArray row1 = Nd4j.create(1, numFeatures); row1.assign(0.2); input.putRow(1, row1); labels.put(1, 0, 1); // set the 2nd column DataSet trainingSet = new DataSet(input, labels); log.info("Build model...."); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .layer(new RBM()) .nIn(trainingSet.numInputs()) .nOut(trainingSet.numOutcomes()) .seed(seed) .weightInit(WeightInit.SIZE) .constrainGradientToUnitNorm(true) .iterations(iterations) .activationFunction("tanh") .visibleUnit(RBM.VisibleUnit.GAUSSIAN) .hiddenUnit(RBM.HiddenUnit.RECTIFIED) .lossFunction(LossFunctions.LossFunction.RMSE_XENT) .learningRate(1e-1f) .optimizationAlgo(OptimizationAlgorithm.ITERATION_GRADIENT_DESCENT) .build(); Layer model = LayerFactories.getFactory(conf).create(conf); model.setIterationListeners( Collections.singletonList((IterationListener) new ScoreIterationListener(listenerFreq))); log.info("Evaluate weights...."); INDArray w = model.getParam(DefaultParamInitializer.WEIGHT_KEY); log.info("Weights: " + w); log.info("Train model...."); model.fit(trainingSet.getFeatureMatrix()); log.info("Visualize training results...."); // Work in progress to get NeuralNetPlotter functioning NeuralNetPlotter plotter = new NeuralNetPlotter(); plotter.plotNetworkGradient(model, model.gradient(), 10); }
@Test public void testDbn() throws Exception { Nd4j.MAX_SLICES_TO_PRINT = -1; Nd4j.MAX_ELEMENTS_PER_SLICE = -1; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .iterations(100) .layer(new org.deeplearning4j.nn.conf.layers.RBM()) .weightInit(WeightInit.DISTRIBUTION) .dist(new UniformDistribution(0, 1)) .activationFunction("tanh") .momentum(0.9) .optimizationAlgo(OptimizationAlgorithm.LBFGS) .constrainGradientToUnitNorm(true) .k(1) .regularization(true) .l2(2e-4) .visibleUnit(org.deeplearning4j.nn.conf.layers.RBM.VisibleUnit.GAUSSIAN) .hiddenUnit(org.deeplearning4j.nn.conf.layers.RBM.HiddenUnit.RECTIFIED) .lossFunction(LossFunctions.LossFunction.RMSE_XENT) .nIn(4) .nOut(3) .list(2) .hiddenLayerSizes(3) .override(1, new ClassifierOverride(1)) .build(); NeuralNetConfiguration conf2 = new NeuralNetConfiguration.Builder() .layer(new org.deeplearning4j.nn.conf.layers.RBM()) .nIn(784) .nOut(600) .applySparsity(true) .sparsity(0.1) .build(); Layer l = LayerFactories.getFactory(conf2) .create(conf2, Arrays.<IterationListener>asList(new ScoreIterationListener(2)), 0); MultiLayerNetwork d = new MultiLayerNetwork(conf); DataSetIterator iter = new IrisDataSetIterator(150, 150); DataSet next = iter.next(); Nd4j.writeTxt(next.getFeatureMatrix(), "iris.txt", "\t"); next.normalizeZeroMeanZeroUnitVariance(); SplitTestAndTrain testAndTrain = next.splitTestAndTrain(110); DataSet train = testAndTrain.getTrain(); d.fit(train); DataSet test = testAndTrain.getTest(); Evaluation eval = new Evaluation(); INDArray output = d.output(test.getFeatureMatrix()); eval.eval(test.getLabels(), output); log.info("Score " + eval.stats()); }