/** * Adds the layers to the architecture such that the architecture represents a Cascade network. * All layers are fully connected to the input layer Each hidden layer is fully connected to the * hidden layer added before it. * * @param architecture {@inheritDoc } */ @Override public void buildArchitecture(Architecture architecture) { List<Layer> layers = architecture.getLayers(); layers.clear(); List<LayerConfiguration> layerConfigurations = this.getLayerConfigurations(); int listSize = layerConfigurations.size(); // build the input layer ForwardingLayer inputLayer = new ForwardingLayer(); inputLayer.setSourceSize(layerConfigurations.get(0).getSize()); if (layerConfigurations.get(0).isBias()) { inputLayer.setBias(true); inputLayer.add(new BiasNeuron()); } layers.add(inputLayer); // build the hidden layers and output layer int sumOfPreviousLayerAbsoluteSizes = inputLayer.size(); for (int curLayer = 1; curLayer < layerConfigurations.size(); ++curLayer) { if (layerConfigurations.get(curLayer).getSize() == 0) throw new UnsupportedOperationException( "Hidden layers must have at least one neuron each."); layerConfigurations .get(curLayer) .setBias(false); // All neurons have access to the bias in the input layer. Layer newLayer = this.getLayerBuilder() .buildLayer(layerConfigurations.get(curLayer), sumOfPreviousLayerAbsoluteSizes); layers.add(newLayer); sumOfPreviousLayerAbsoluteSizes += newLayer.size(); } }
/** * Builds a layer by cloning a prototype neuron and adding to it weights such that it is fully * connected to the feeding layer. * * @param layerConfiguration * @param previousLayerAbsoluteSize * @return the built layer. */ @Override public Layer buildLayer(LayerConfiguration layerConfiguration, int previousLayerAbsoluteSize) { prototypeNeuron.setActivationFunction(layerConfiguration.getActivationFunction()); int layerSize = layerConfiguration.getSize(); boolean bias = layerConfiguration.isBias(); // determine correct domain registry DomainRegistry domainRegistry = domainProvider.generateDomain(previousLayerAbsoluteSize); // set domain for prototype neuron prototypeNeuron.setDomain(domainRegistry.getDomainString()); // get prototype weight vector Vector prototypeWeightVector = null; try { prototypeWeightVector = (Vector) domainRegistry.getBuiltRepresentation(); } catch (ClassCastException exception) { throw new UnsupportedOperationException( "The domain string of the neural network weights has to be real valued"); } // add neurons to layer Layer layer = new Layer(); for (int i = 0; i < layerSize; i++) { Neuron newNeuron = prototypeNeuron.getClone(); Vector weights = prototypeWeightVector.getClone(); // TODO: initialisation should be done by training algorithm this.getWeightInitialisationStrategy().initialise(weights); newNeuron.setWeights(weights); layer.add(newNeuron); } if (bias) { layer.add(new BiasNeuron()); layer.setBias(true); } return layer; }
/** * Performs a gradient decent backpropagation given the previous {@link StandardPattern} as input * as well as the weight updates after the previous execution of a backpropagation. If the * previous weight updates do not exist, the visitor will create them and initialize them to zero. * * @param architecture */ @Override public void visit(Architecture architecture) { List<Layer> layers = architecture.getLayers(); int numLayers = layers.size(); int currentLayerIdx = numLayers - 1; // skip input layer Layer currentLayer = layers.get(currentLayerIdx); int layerSize = currentLayer.size(); Layer nextLayer = null; int nextLayerSize = -1; Neuron currentNeuron; // setup delta storage layerWeightsDelta = new double[numLayers - 1][]; // not necessary for input layer // calculate output layer deltas layerWeightsDelta[currentLayerIdx - 1] = new double[layerSize]; for (int k = 0; k < layerSize; k++) { currentNeuron = currentLayer.get(k); double t_k = layerSize > 1 ? ((Vector) previousPattern.getTarget()).doubleValueOf(k) : ((Real) previousPattern.getTarget()).doubleValue(); double o_k = currentNeuron.getActivation(); layerWeightsDelta[currentLayerIdx - 1][k] = -1.0 * (t_k - o_k) * currentNeuron.getActivationFunction().getGradient(o_k); } // calculate deltas for all hidden layers for (currentLayerIdx = numLayers - 2; currentLayerIdx > 0; currentLayerIdx--) { currentLayer = layers.get(currentLayerIdx); layerSize = currentLayer.size(); layerSize = currentLayer.isBias() ? layerSize - 1 : layerSize; layerWeightsDelta[currentLayerIdx - 1] = new double[layerSize]; for (int j = 0; j < layerSize; j++) { layerWeightsDelta[currentLayerIdx - 1][j] = 0.0; nextLayer = layers.get(currentLayerIdx + 1); nextLayerSize = nextLayer.size(); nextLayerSize = nextLayer.isBias() ? nextLayerSize - 1 : nextLayerSize; for (int k = 0; k < nextLayerSize; k++) { double w_kj = nextLayer.get(k).getWeights().doubleValueOf(j); layerWeightsDelta[currentLayerIdx - 1][j] += w_kj * layerWeightsDelta[currentLayerIdx][k]; } currentNeuron = currentLayer.get(j); layerWeightsDelta[currentLayerIdx - 1][j] *= currentNeuron.getActivationFunction().getGradient(currentNeuron.getActivation()); } } // storage for the weight updates if (previousWeightUpdates == null) { previousWeightUpdates = new double[numLayers - 1][]; for (currentLayerIdx = numLayers - 1; currentLayerIdx > 0; currentLayerIdx--) { for (int k = 0; k < layerSize; k++) { currentLayer = layers.get(currentLayerIdx); layerSize = currentLayer.isBias() ? currentLayer.size() - 1 : currentLayer.size(); int previousLayerSize = layers.get(currentLayerIdx - 1).size(); previousWeightUpdates[currentLayerIdx - 1] = new double[layerSize * previousLayerSize + previousLayerSize + 1]; } } } ((ForwardingLayer) layers.get(0)).setSource(new PatternInputSource(previousPattern)); // updates output and all hidden layer weights for (currentLayerIdx = numLayers - 1; currentLayerIdx > 0; currentLayerIdx--) { // loop excludes input layer currentLayer = layers.get(currentLayerIdx); layerSize = currentLayer.isBias() ? currentLayer.size() - 1 : currentLayer.size(); int previousLayerSize = -1; Layer previousLayer = null; for (int k = 0; k < layerSize; k++) { currentNeuron = currentLayer.get(k); previousLayer = layers.get(currentLayerIdx - 1); previousLayerSize = previousLayer.size(); double tmp = (-1.0 * learningRate) * layerWeightsDelta[currentLayerIdx - 1][k]; for (int j = 0; j < previousLayerSize; j++) { double weight = currentNeuron.getWeights().doubleValueOf(j); double newWeightUpdate = tmp * previousLayer.getNeuralInput(j); double update = newWeightUpdate + momentum * previousWeightUpdates[currentLayerIdx - 1][k * previousLayerSize + j]; currentNeuron.getWeights().setReal(j, weight + update); previousWeightUpdates[currentLayerIdx - 1][k * previousLayerSize + j] = newWeightUpdate; } } } }