/** * Train the neural network to predict an output given some input. * * @param input The input to the neural network. * @param output The target output for the given input. * @param learningRate The rate at which the neural network learns. This is normally 0.01. * @return The error of the network as an mean cross entropy. */ public double train(Matrix input, Matrix output, double learningRate) { double totalError = 0; if (input.getNumRows() == output.getNumRows()) { for (int i = 0; i < input.getNumRows(); i++) { Matrix inputRow = new Matrix(new double[][] {input.getRow(i)}).transpose(); Matrix outputRow = new Matrix(new double[][] {output.getRow(i)}).transpose(); Matrix netOutput = this.predict(inputRow); // Output layer Matrix previousDelta = outputRow .subtract(netOutput) .multiply(-1) .multiply( layers .get(layers.size() - 1) .applyFunctionDerivative(layers.get(layers.size() - 1).inputMatrix)); Matrix change = previousDelta .dot(layers.get(layers.size() - 2).outputMatrix.transpose()) .add(layers.get(layers.size() - 1).weightMatrix.multiply(lambda)); layers.get(layers.size() - 1).weightMatrix = layers.get(layers.size() - 1).weightMatrix.subtract(change.multiply(learningRate)); // Hidden layers for (int l = layers.size() - 2; l > 0; l--) { previousDelta = layers .get(l + 1) .weightMatrix .transpose() .dot(previousDelta) .multiply(layers.get(l).applyFunctionDerivative(layers.get(l).inputMatrix)); change = previousDelta .dot(layers.get(l - 1).outputMatrix.transpose()) .add(layers.get(l).weightMatrix.multiply(lambda)); layers.get(l).weightMatrix = layers.get(l).weightMatrix.subtract(change.multiply(learningRate)); } double error = squaredError(inputRow, outputRow); totalError += error; } } return totalError; }
/** * Give a prediction based on some input. * * @param input The input to the neural network which is equal in size to the number of input * neurons. * @return The output of the neural network. */ public Matrix predict(Matrix input) { if (input.getNumRows() != layers.get(0).getLayerSize().getInputSize()) { throw new InvalidParameterException( "Input size did not match the input size of the first layer"); } Matrix modInput = (Matrix) input.clone(); for (Layer l : layers) { modInput = l.activate(modInput); } return modInput; }
/** * Applies the activation function to the processed input. * * @param input The input to the activation function. * @return The output of the activation function. */ private Matrix applyFunction(Matrix input) { Matrix activated = (Matrix) input.clone(); for (int row = 0; row < input.getNumRows(); row++) for (int col = 0; col < input.getNumCols(); col++) activated.set(row, col, function.activate(input.get(row, col))); if (function instanceof Softmax) { double sum = activated.sum(); if (sum != 0) activated = activated.multiply(1 / sum); } return activated; }