예제 #1
0
  /**
   * Adds the layers to the architecture such that the architecture represents a Cascade network.
   * All layers are fully connected to the input layer Each hidden layer is fully connected to the
   * hidden layer added before it.
   *
   * @param architecture {@inheritDoc }
   */
  @Override
  public void buildArchitecture(Architecture architecture) {
    List<Layer> layers = architecture.getLayers();
    layers.clear();

    List<LayerConfiguration> layerConfigurations = this.getLayerConfigurations();
    int listSize = layerConfigurations.size();

    // build the input layer
    ForwardingLayer inputLayer = new ForwardingLayer();
    inputLayer.setSourceSize(layerConfigurations.get(0).getSize());
    if (layerConfigurations.get(0).isBias()) {
      inputLayer.setBias(true);
      inputLayer.add(new BiasNeuron());
    }
    layers.add(inputLayer);

    // build the hidden layers and output layer
    int sumOfPreviousLayerAbsoluteSizes = inputLayer.size();
    for (int curLayer = 1; curLayer < layerConfigurations.size(); ++curLayer) {
      if (layerConfigurations.get(curLayer).getSize() == 0)
        throw new UnsupportedOperationException(
            "Hidden layers must have at least one neuron each.");

      layerConfigurations
          .get(curLayer)
          .setBias(false); // All neurons have access to the bias in the input layer.
      Layer newLayer =
          this.getLayerBuilder()
              .buildLayer(layerConfigurations.get(curLayer), sumOfPreviousLayerAbsoluteSizes);
      layers.add(newLayer);
      sumOfPreviousLayerAbsoluteSizes += newLayer.size();
    }
  }
예제 #2
0
  /**
   * Performs a gradient decent backpropagation given the previous {@link StandardPattern} as input
   * as well as the weight updates after the previous execution of a backpropagation. If the
   * previous weight updates do not exist, the visitor will create them and initialize them to zero.
   *
   * @param architecture
   */
  @Override
  public void visit(Architecture architecture) {
    List<Layer> layers = architecture.getLayers();
    int numLayers = layers.size();
    int currentLayerIdx = numLayers - 1; // skip input layer
    Layer currentLayer = layers.get(currentLayerIdx);
    int layerSize = currentLayer.size();
    Layer nextLayer = null;
    int nextLayerSize = -1;
    Neuron currentNeuron;

    // setup delta storage
    layerWeightsDelta = new double[numLayers - 1][]; // not necessary for input layer

    // calculate output layer deltas
    layerWeightsDelta[currentLayerIdx - 1] = new double[layerSize];
    for (int k = 0; k < layerSize; k++) {
      currentNeuron = currentLayer.get(k);
      double t_k =
          layerSize > 1
              ? ((Vector) previousPattern.getTarget()).doubleValueOf(k)
              : ((Real) previousPattern.getTarget()).doubleValue();
      double o_k = currentNeuron.getActivation();
      layerWeightsDelta[currentLayerIdx - 1][k] =
          -1.0 * (t_k - o_k) * currentNeuron.getActivationFunction().getGradient(o_k);
    }

    // calculate deltas for all hidden layers
    for (currentLayerIdx = numLayers - 2; currentLayerIdx > 0; currentLayerIdx--) {
      currentLayer = layers.get(currentLayerIdx);
      layerSize = currentLayer.size();
      layerSize = currentLayer.isBias() ? layerSize - 1 : layerSize;
      layerWeightsDelta[currentLayerIdx - 1] = new double[layerSize];
      for (int j = 0; j < layerSize; j++) {
        layerWeightsDelta[currentLayerIdx - 1][j] = 0.0;
        nextLayer = layers.get(currentLayerIdx + 1);
        nextLayerSize = nextLayer.size();
        nextLayerSize = nextLayer.isBias() ? nextLayerSize - 1 : nextLayerSize;
        for (int k = 0; k < nextLayerSize; k++) {
          double w_kj = nextLayer.get(k).getWeights().doubleValueOf(j);
          layerWeightsDelta[currentLayerIdx - 1][j] += w_kj * layerWeightsDelta[currentLayerIdx][k];
        }
        currentNeuron = currentLayer.get(j);
        layerWeightsDelta[currentLayerIdx - 1][j] *=
            currentNeuron.getActivationFunction().getGradient(currentNeuron.getActivation());
      }
    }

    // storage for the weight updates
    if (previousWeightUpdates == null) {
      previousWeightUpdates = new double[numLayers - 1][];
      for (currentLayerIdx = numLayers - 1; currentLayerIdx > 0; currentLayerIdx--) {
        for (int k = 0; k < layerSize; k++) {
          currentLayer = layers.get(currentLayerIdx);
          layerSize = currentLayer.isBias() ? currentLayer.size() - 1 : currentLayer.size();
          int previousLayerSize = layers.get(currentLayerIdx - 1).size();
          previousWeightUpdates[currentLayerIdx - 1] =
              new double[layerSize * previousLayerSize + previousLayerSize + 1];
        }
      }
    }

    ((ForwardingLayer) layers.get(0)).setSource(new PatternInputSource(previousPattern));
    // updates output and all hidden layer weights
    for (currentLayerIdx = numLayers - 1;
        currentLayerIdx > 0;
        currentLayerIdx--) { // loop excludes input layer
      currentLayer = layers.get(currentLayerIdx);
      layerSize = currentLayer.isBias() ? currentLayer.size() - 1 : currentLayer.size();
      int previousLayerSize = -1;
      Layer previousLayer = null;

      for (int k = 0; k < layerSize; k++) {
        currentNeuron = currentLayer.get(k);
        previousLayer = layers.get(currentLayerIdx - 1);
        previousLayerSize = previousLayer.size();

        double tmp = (-1.0 * learningRate) * layerWeightsDelta[currentLayerIdx - 1][k];
        for (int j = 0; j < previousLayerSize; j++) {
          double weight = currentNeuron.getWeights().doubleValueOf(j);
          double newWeightUpdate = tmp * previousLayer.getNeuralInput(j);
          double update =
              newWeightUpdate
                  + momentum
                      * previousWeightUpdates[currentLayerIdx - 1][k * previousLayerSize + j];
          currentNeuron.getWeights().setReal(j, weight + update);
          previousWeightUpdates[currentLayerIdx - 1][k * previousLayerSize + j] = newWeightUpdate;
        }
      }
    }
  }