@Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);

    ImprovedNeuralNetModel model = new ImprovedNeuralNetModel(exampleSet);

    List<String[]> hiddenLayers = getParameterList(PARAMETER_HIDDEN_LAYERS);
    int maxCycles = getParameterAsInt(PARAMETER_TRAINING_CYCLES);
    double maxError = getParameterAsDouble(PARAMETER_ERROR_EPSILON);
    double learningRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
    double momentum = getParameterAsDouble(PARAMETER_MOMENTUM);
    boolean decay = getParameterAsBoolean(PARAMETER_DECAY);
    boolean shuffle = getParameterAsBoolean(PARAMETER_SHUFFLE);
    boolean normalize = getParameterAsBoolean(PARAMETER_NORMALIZE);
    RandomGenerator randomGenerator = RandomGenerator.getRandomGenerator(this);

    model.train(
        exampleSet,
        hiddenLayers,
        maxCycles,
        maxError,
        learningRate,
        momentum,
        decay,
        shuffle,
        normalize,
        randomGenerator,
        this);
    return model;
  }
Пример #2
0
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    // only use numeric attributes
    Tools.onlyNumericalAttributes(exampleSet, "KernelPCA");
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this);

    Attributes attributes = exampleSet.getAttributes();
    int numberOfExamples = exampleSet.size();

    // calculating means for later zero centering
    exampleSet.recalculateAllAttributeStatistics();
    double[] means = new double[exampleSet.getAttributes().size()];
    int i = 0;
    for (Attribute attribute : exampleSet.getAttributes()) {
      means[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE);
      i++;
    }

    // kernel
    Kernel kernel = Kernel.createKernel(this);

    // copying zero centered exampleValues
    ArrayList<double[]> exampleValues = new ArrayList<double[]>(numberOfExamples);
    i = 0;
    for (Example columnExample : exampleSet) {
      double[] columnValues = getAttributeValues(columnExample, attributes, means);
      exampleValues.add(columnValues);
      i++;
    }

    // filling kernel matrix
    Matrix kernelMatrix = new Matrix(numberOfExamples, numberOfExamples);
    for (i = 0; i < numberOfExamples; i++) {
      for (int j = 0; j < numberOfExamples; j++) {
        kernelMatrix.set(
            i, j, kernel.calculateDistance(exampleValues.get(i), exampleValues.get(j)));
      }
    }

    // calculating eigenVectors
    EigenvalueDecomposition eig = kernelMatrix.eig();
    Model model = new KernelPCAModel(exampleSet, means, eig.getV(), exampleValues, kernel);

    if (exampleSetOutput.isConnected()) {
      exampleSetOutput.deliver(model.apply(exampleSet));
    }
    originalOutput.deliver(exampleSet);
    modelOutput.deliver(model);
  }
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    int k = getParameterAsInt(PARAMETER_K);
    int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS);
    boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS);
    Kernel kernel = Kernel.createKernel(this);

    // init operator progress
    getProgress().setTotal(maxOptimizationSteps);

    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // additional checks
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);

    if (exampleSet.size() < k) {
      throw new UserError(this, 142, k);
    }

    // extracting attribute names
    Attributes attributes = exampleSet.getAttributes();
    ArrayList<String> attributeNames = new ArrayList<String>(attributes.size());
    for (Attribute attribute : attributes) {
      attributeNames.add(attribute.getName());
    }
    Attribute weightAttribute = attributes.getWeight();

    RandomGenerator generator = RandomGenerator.getRandomGenerator(this);

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    // init centroids
    int[] clusterAssignments = new int[exampleSet.size()];

    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = generator.nextIntInRange(0, k);
    }

    // run optimization steps
    boolean stable = false;
    for (int step = 0; step < maxOptimizationSteps && !stable; step++) {
      // calculating cluster kernel properties
      double[] clusterWeights = new double[k];
      double[] clusterKernelCorrection = new double[k];
      int i = 0;
      for (Example firstExample : exampleSet) {
        double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d;
        double[] firstExampleValues = getAsDoubleArray(firstExample, attributes);
        clusterWeights[clusterAssignments[i]] += firstExampleWeight;
        int j = 0;
        for (Example secondExample : exampleSet) {
          if (clusterAssignments[i] == clusterAssignments[j]) {
            double secondExampleWeight =
                useExampleWeights ? secondExample.getValue(weightAttribute) : 1d;
            clusterKernelCorrection[clusterAssignments[i]] +=
                firstExampleWeight
                    * secondExampleWeight
                    * kernel.calculateDistance(
                        firstExampleValues, getAsDoubleArray(secondExample, attributes));
          }
          j++;
        }
        i++;
      }
      for (int z = 0; z < k; z++) {
        clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z];
      }

      // assign examples to new centroids
      int[] newClusterAssignments = new int[exampleSet.size()];
      i = 0;
      for (Example example : exampleSet) {
        double[] exampleValues = getAsDoubleArray(example, attributes);
        double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues);
        double nearestDistance = Double.POSITIVE_INFINITY;
        int nearestIndex = 0;
        for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) {
          double distance = 0;
          // iterating over all examples in cluster to get kernel distance
          int j = 0;
          for (Example clusterExample : exampleSet) {
            if (clusterAssignments[j] == clusterIndex) {
              distance +=
                  (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d)
                      * kernel.calculateDistance(
                          getAsDoubleArray(clusterExample, attributes), exampleValues);
            }
            j++;
          }
          distance *= -2d / clusterWeights[clusterIndex];
          // copy in outer loop
          distance += exampleKernelValue;
          distance += clusterKernelCorrection[clusterIndex];
          if (distance < nearestDistance) {
            nearestDistance = distance;
            nearestIndex = clusterIndex;
          }
        }
        newClusterAssignments[i] = nearestIndex;
        i++;
      }

      // finishing assignment
      stable = true;
      for (int j = 0; j < exampleSet.size() && stable; j++) {
        stable &= newClusterAssignments[j] == clusterAssignments[j];
      }
      clusterAssignments = newClusterAssignments;

      // trigger operator progress
      getProgress().step();
    }

    // setting last clustering into model
    model.setClusterAssignments(clusterAssignments, exampleSet);

    getProgress().complete();

    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);
    DistanceMeasure measure = measureHelper.getInitializedMeasure(exampleSet);

    // additional checks
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);
    Tools.checkAndCreateIds(exampleSet);

    Attribute idAttribute = exampleSet.getAttributes().getId();
    boolean idAttributeIsNominal = idAttribute.isNominal();
    DistanceMatrix matrix = new DistanceMatrix(exampleSet.size());
    Map<Integer, HierarchicalClusterNode> clusterMap =
        new HashMap<Integer, HierarchicalClusterNode>(exampleSet.size());
    int[] clusterIds = new int[exampleSet.size()];
    // filling the distance matrix
    int nextClusterId = 0;
    for (Example example1 : exampleSet) {
      checkForStop();
      clusterIds[nextClusterId] = nextClusterId;
      int y = 0;
      for (Example example2 : exampleSet) {
        if (y > nextClusterId) {
          matrix.set(nextClusterId, y, measure.calculateDistance(example1, example2));
        }
        y++;
      }
      if (idAttributeIsNominal) {
        clusterMap.put(
            nextClusterId,
            new HierarchicalClusterLeafNode(nextClusterId, example1.getValueAsString(idAttribute)));
      } else {
        clusterMap.put(
            nextClusterId,
            new HierarchicalClusterLeafNode(nextClusterId, example1.getValue(idAttribute)));
      }
      nextClusterId++;
    }

    // creating linkage method
    AbstractLinkageMethod linkage = new SingleLinkageMethod(matrix, clusterIds);
    if (getParameterAsString(PARAMETER_MODE).equals(modes[1])) {
      linkage = new CompleteLinkageMethod(matrix, clusterIds);
    } else if (getParameterAsString(PARAMETER_MODE).equals(modes[2])) {
      linkage = new AverageLinkageMethod(matrix, clusterIds);
    }

    // now building agglomerative tree bottom up
    while (clusterMap.size() > 1) {
      Agglomeration agglomeration = linkage.getNextAgglomeration(nextClusterId, clusterMap);
      HierarchicalClusterNode newNode =
          new HierarchicalClusterNode(nextClusterId, agglomeration.getDistance());
      newNode.addSubNode(clusterMap.get(agglomeration.getClusterId1()));
      newNode.addSubNode(clusterMap.get(agglomeration.getClusterId2()));
      clusterMap.remove(agglomeration.getClusterId1());
      clusterMap.remove(agglomeration.getClusterId2());
      clusterMap.put(nextClusterId, newNode);
      nextClusterId++;
    }

    // creating model
    HierarchicalClusterModel model =
        new DendogramHierarchicalClusterModel(clusterMap.entrySet().iterator().next().getValue());

    // registering visualizer
    ObjectVisualizerService.addObjectVisualizer(
        model, new ExampleVisualizer((ExampleSet) exampleSet.clone()));

    modelOutput.deliver(model);
    exampleSetOutput.deliver(exampleSet);
  }