예제 #1
0
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    Kernel kernel = getKernel();
    kernel.init(exampleSet);

    double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
    NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping();
    String classNeg = labelMapping.getNegativeString();
    String classPos = labelMapping.getPositiveString();
    double classValueNeg = labelMapping.getNegativeIndex();
    int numberOfAttributes = exampleSet.getAttributes().size();
    HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel);
    model.init(new double[numberOfAttributes], 0);
    for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) {
      double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate);
      Attributes attributes = exampleSet.getAttributes();
      for (Example example : exampleSet) {
        double prediction = model.predict(example);
        if (prediction != example.getLabel()) {
          double direction = (example.getLabel() == classValueNeg) ? -1 : 1;
          // adapting intercept
          model.setIntercept(model.getIntercept() + learnRate * direction);
          // adapting coefficients
          double coefficients[] = model.getCoefficients();
          int i = 0;
          for (Attribute attribute : attributes) {
            coefficients[i] += learnRate * direction * example.getValue(attribute);
            i++;
          }
        }
      }
    }
    return model;
  }
예제 #2
0
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    // only use numeric attributes
    Tools.onlyNumericalAttributes(exampleSet, "KernelPCA");
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this);

    Attributes attributes = exampleSet.getAttributes();
    int numberOfExamples = exampleSet.size();

    // calculating means for later zero centering
    exampleSet.recalculateAllAttributeStatistics();
    double[] means = new double[exampleSet.getAttributes().size()];
    int i = 0;
    for (Attribute attribute : exampleSet.getAttributes()) {
      means[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE);
      i++;
    }

    // kernel
    Kernel kernel = Kernel.createKernel(this);

    // copying zero centered exampleValues
    ArrayList<double[]> exampleValues = new ArrayList<double[]>(numberOfExamples);
    i = 0;
    for (Example columnExample : exampleSet) {
      double[] columnValues = getAttributeValues(columnExample, attributes, means);
      exampleValues.add(columnValues);
      i++;
    }

    // filling kernel matrix
    Matrix kernelMatrix = new Matrix(numberOfExamples, numberOfExamples);
    for (i = 0; i < numberOfExamples; i++) {
      for (int j = 0; j < numberOfExamples; j++) {
        kernelMatrix.set(
            i, j, kernel.calculateDistance(exampleValues.get(i), exampleValues.get(j)));
      }
    }

    // calculating eigenVectors
    EigenvalueDecomposition eig = kernelMatrix.eig();
    Model model = new KernelPCAModel(exampleSet, means, eig.getV(), exampleValues, kernel);

    if (exampleSetOutput.isConnected()) {
      exampleSetOutput.deliver(model.apply(exampleSet));
    }
    originalOutput.deliver(exampleSet);
    modelOutput.deliver(model);
  }
  @Override
  public List<ParameterType> getParameterTypes() {
    List<ParameterType> types = super.getParameterTypes();
    types.add(
        new ParameterTypeBoolean(
            PARAMETER_USE_WEIGHTS,
            "Indicates if the weight attribute should be used.",
            false,
            false));
    types.add(
        new ParameterTypeInt(
            PARAMETER_K,
            "The number of clusters which should be detected.",
            2,
            Integer.MAX_VALUE,
            2,
            false));
    types.add(
        new ParameterTypeInt(
            PARAMETER_MAX_OPTIMIZATION_STEPS,
            "The maximal number of iterations performed for one run of k-Means.",
            1,
            Integer.MAX_VALUE,
            100,
            false));

    types.addAll(RandomGenerator.getRandomGeneratorParameters(this));

    types.addAll(Kernel.getParameters(this));
    return types;
  }
예제 #4
0
 @Override
 public double predict(Example example) throws OperatorException {
   int i = 0;
   double distance = intercept;
   // using kernel for distance calculation
   double[] values = new double[example.getAttributes().size()];
   for (Attribute currentAttribute : example.getAttributes()) {
     values[i] = example.getValue(currentAttribute);
     i++;
   }
   distance += kernel.calculateDistance(values, coefficients);
   if (getLabel().isNominal()) {
     int positiveMapping = getLabel().getMapping().mapString(classPositive);
     int negativeMapping = getLabel().getMapping().mapString(classNegative);
     boolean isApplying = example.getAttributes().getPredictedLabel() != null;
     if (isApplying) {
       example.setConfidence(classPositive, 1.0d / (1.0d + java.lang.Math.exp(-distance)));
       example.setConfidence(classNegative, 1.0d / (1.0d + java.lang.Math.exp(distance)));
     }
     if (distance < 0) {
       return negativeMapping;
     } else {
       return positiveMapping;
     }
   } else {
     return distance;
   }
 }
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    int k = getParameterAsInt(PARAMETER_K);
    int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS);
    boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS);
    Kernel kernel = Kernel.createKernel(this);

    // init operator progress
    getProgress().setTotal(maxOptimizationSteps);

    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // additional checks
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);

    if (exampleSet.size() < k) {
      throw new UserError(this, 142, k);
    }

    // extracting attribute names
    Attributes attributes = exampleSet.getAttributes();
    ArrayList<String> attributeNames = new ArrayList<String>(attributes.size());
    for (Attribute attribute : attributes) {
      attributeNames.add(attribute.getName());
    }
    Attribute weightAttribute = attributes.getWeight();

    RandomGenerator generator = RandomGenerator.getRandomGenerator(this);

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    // init centroids
    int[] clusterAssignments = new int[exampleSet.size()];

    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = generator.nextIntInRange(0, k);
    }

    // run optimization steps
    boolean stable = false;
    for (int step = 0; step < maxOptimizationSteps && !stable; step++) {
      // calculating cluster kernel properties
      double[] clusterWeights = new double[k];
      double[] clusterKernelCorrection = new double[k];
      int i = 0;
      for (Example firstExample : exampleSet) {
        double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d;
        double[] firstExampleValues = getAsDoubleArray(firstExample, attributes);
        clusterWeights[clusterAssignments[i]] += firstExampleWeight;
        int j = 0;
        for (Example secondExample : exampleSet) {
          if (clusterAssignments[i] == clusterAssignments[j]) {
            double secondExampleWeight =
                useExampleWeights ? secondExample.getValue(weightAttribute) : 1d;
            clusterKernelCorrection[clusterAssignments[i]] +=
                firstExampleWeight
                    * secondExampleWeight
                    * kernel.calculateDistance(
                        firstExampleValues, getAsDoubleArray(secondExample, attributes));
          }
          j++;
        }
        i++;
      }
      for (int z = 0; z < k; z++) {
        clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z];
      }

      // assign examples to new centroids
      int[] newClusterAssignments = new int[exampleSet.size()];
      i = 0;
      for (Example example : exampleSet) {
        double[] exampleValues = getAsDoubleArray(example, attributes);
        double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues);
        double nearestDistance = Double.POSITIVE_INFINITY;
        int nearestIndex = 0;
        for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) {
          double distance = 0;
          // iterating over all examples in cluster to get kernel distance
          int j = 0;
          for (Example clusterExample : exampleSet) {
            if (clusterAssignments[j] == clusterIndex) {
              distance +=
                  (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d)
                      * kernel.calculateDistance(
                          getAsDoubleArray(clusterExample, attributes), exampleValues);
            }
            j++;
          }
          distance *= -2d / clusterWeights[clusterIndex];
          // copy in outer loop
          distance += exampleKernelValue;
          distance += clusterKernelCorrection[clusterIndex];
          if (distance < nearestDistance) {
            nearestDistance = distance;
            nearestIndex = clusterIndex;
          }
        }
        newClusterAssignments[i] = nearestIndex;
        i++;
      }

      // finishing assignment
      stable = true;
      for (int j = 0; j < exampleSet.size() && stable; j++) {
        stable &= newClusterAssignments[j] == clusterAssignments[j];
      }
      clusterAssignments = newClusterAssignments;

      // trigger operator progress
      getProgress().step();
    }

    // setting last clustering into model
    model.setClusterAssignments(clusterAssignments, exampleSet);

    getProgress().complete();

    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
예제 #6
0
 @Override
 public List<ParameterType> getParameterTypes() {
   List<ParameterType> types = super.getParameterTypes();
   types.addAll(Kernel.getParameters(this));
   return types;
 }
  /** Returns a model containing all support vectors, i.e. the examples with non-zero alphas. */
  private EvoSVMModel getModel(double[] alphas) {
    // calculate support vectors
    Iterator<Example> reader = exampleSet.iterator();
    List<SupportVector> supportVectors = new ArrayList<SupportVector>();
    int index = 0;
    while (reader.hasNext()) {
      double currentAlpha = alphas[index];
      Example currentExample = reader.next();
      if (currentAlpha != 0.0d) {
        double[] x = new double[exampleSet.getAttributes().size()];
        int a = 0;
        for (Attribute attribute : exampleSet.getAttributes())
          x[a++] = currentExample.getValue(attribute);
        supportVectors.add(new SupportVector(x, ys[index], currentAlpha));
      }
      index++;
    }

    // calculate all sum values
    double[] sum = new double[exampleSet.size()];
    reader = exampleSet.iterator();
    index = 0;
    while (reader.hasNext()) {
      Example current = reader.next();
      double[] x = new double[exampleSet.getAttributes().size()];
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) x[a++] = current.getValue(attribute);
      sum[index] = kernel.getSum(supportVectors, x);
      index++;
    }

    // calculate b (from Stefan's mySVM code)
    double bSum = 0.0d;
    int bCounter = 0;
    for (int i = 0; i < alphas.length; i++) {
      if ((ys[i] * alphas[i] - c < -IS_ZERO) && (ys[i] * alphas[i] > IS_ZERO)) {
        bSum += ys[i] - sum[i];
        bCounter++;
      } else if ((ys[i] * alphas[i] + c > IS_ZERO) && (ys[i] * alphas[i] < -IS_ZERO)) {
        bSum += ys[i] - sum[i];
        bCounter++;
      }
    }

    if (bCounter == 0) {
      // unlikely
      bSum = 0.0d;
      for (int i = 0; i < alphas.length; i++) {
        if ((ys[i] * alphas[i] < IS_ZERO) && (ys[i] * alphas[i] > -IS_ZERO)) {
          bSum += ys[i] - sum[i];
          bCounter++;
        }
      }
      if (bCounter == 0) {
        // even unlikelier
        bSum = 0.0d;
        for (int i = 0; i < alphas.length; i++) {
          bSum += ys[i] - sum[i];
          bCounter++;
        }
      }
    }
    return new EvoSVMModel(exampleSet, supportVectors, kernel, bSum / bCounter);
  }