/** * Calculate the squared error of the neural network. * * @param x The input to the neural network. * @param y The expected output. * @return The squared error. */ public double squaredError(Matrix x, Matrix y) { Matrix y_ = predict(x); double sumSquareWeights = 0; for (Layer layer : layers) sumSquareWeights += layer.weightMatrix.power(2).sum(); double j = 0.5 * y_.subtract(y).power(2).sum() / layers.get(0).getLayerSize().getInputSize() + lambda / 2 * sumSquareWeights; return j; }
/** * Give a prediction based on some input. * * @param input The input to the neural network which is equal in size to the number of input * neurons. * @return The output of the neural network. */ public Matrix predict(Matrix input) { if (input.getNumRows() != layers.get(0).getLayerSize().getInputSize()) { throw new InvalidParameterException( "Input size did not match the input size of the first layer"); } Matrix modInput = (Matrix) input.clone(); for (Layer l : layers) { modInput = l.activate(modInput); } return modInput; }
private Matrix createRandomMatrix(int rows, int cols) { Matrix random = new Matrix(rows, cols); return random.map( new Matrix.Function() { @Override public double function(double x) { return Math.random(); } }); }
/** * Calculate the cross entropy error of the neural network. * * @param x The input to the neural network. * @param y The expected output. * @return The cross entropy error. */ public double crossEntropyError(Matrix x, Matrix y) { Matrix y_ = predict(x); double j = y_.multiply( y.map( new Matrix.Function() { @Override public double function(double x) { return Math.log(x); } })) .sum(); return -j; }
/** * Applies the activation function to the processed input. * * @param input The input to the activation function. * @return The output of the activation function. */ private Matrix applyFunction(Matrix input) { Matrix activated = (Matrix) input.clone(); for (int row = 0; row < input.getNumRows(); row++) for (int col = 0; col < input.getNumCols(); col++) activated.set(row, col, function.activate(input.get(row, col))); if (function instanceof Softmax) { double sum = activated.sum(); if (sum != 0) activated = activated.multiply(1 / sum); } return activated; }
private Matrix applyFunctionDerivative(Matrix input) { Matrix activated = (Matrix) input.clone(); if (function instanceof Softmax) activated = activated.map( new Matrix.Function() { @Override public double function(double x) { return Math.exp(x); } }); else activated = activated.map( new Matrix.Function() { @Override public double function(double x) { return function.derivative(x); } }); if (function instanceof Softmax) { double sum = activated.sum(); if (sum != 0) activated = activated.multiply(1 / sum); activated = activated.subtract(input); activated = activated.map( new Matrix.Function() { @Override public double function(double x) { return function.activate(x); } }); } return activated; }
/** * Processes the input to the layer. * * @param input The input to the layer. * @return The output of the layer. */ private Matrix activate(Matrix input) { inputMatrix = weightMatrix.dot(input).add(biasMatrix); Matrix y = applyFunction(inputMatrix); outputMatrix = y; return y; }
/** * Train the neural network to predict an output given some input. * * @param input The input to the neural network. * @param output The target output for the given input. * @param learningRate The rate at which the neural network learns. This is normally 0.01. * @return The error of the network as an mean cross entropy. */ public double train(Matrix input, Matrix output, double learningRate) { double totalError = 0; if (input.getNumRows() == output.getNumRows()) { for (int i = 0; i < input.getNumRows(); i++) { Matrix inputRow = new Matrix(new double[][] {input.getRow(i)}).transpose(); Matrix outputRow = new Matrix(new double[][] {output.getRow(i)}).transpose(); Matrix netOutput = this.predict(inputRow); // Output layer Matrix previousDelta = outputRow .subtract(netOutput) .multiply(-1) .multiply( layers .get(layers.size() - 1) .applyFunctionDerivative(layers.get(layers.size() - 1).inputMatrix)); Matrix change = previousDelta .dot(layers.get(layers.size() - 2).outputMatrix.transpose()) .add(layers.get(layers.size() - 1).weightMatrix.multiply(lambda)); layers.get(layers.size() - 1).weightMatrix = layers.get(layers.size() - 1).weightMatrix.subtract(change.multiply(learningRate)); // Hidden layers for (int l = layers.size() - 2; l > 0; l--) { previousDelta = layers .get(l + 1) .weightMatrix .transpose() .dot(previousDelta) .multiply(layers.get(l).applyFunctionDerivative(layers.get(l).inputMatrix)); change = previousDelta .dot(layers.get(l - 1).outputMatrix.transpose()) .add(layers.get(l).weightMatrix.multiply(lambda)); layers.get(l).weightMatrix = layers.get(l).weightMatrix.subtract(change.multiply(learningRate)); } double error = squaredError(inputRow, outputRow); totalError += error; } } return totalError; }
/** * Set the weight matrix of a layer of the neural network. * * @param layer The layer number of the neural network. * @param weights The new weight matrix for the layer. */ public void setWeights(int layer, Matrix weights) { layers.get(layer).weightMatrix = (Matrix) weights.clone(); }
/** * Get the position of the most probable in an output array. * * @param output The output of the neural network (using Softmax) * @return The position of the most probable class. */ public static int argMax(Matrix output) { double max = output.max(); return output.find(max)[0]; }