Ejemplo n.º 1
0
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // generating assignment
    RandomGenerator random = RandomGenerator.getRandomGenerator(this);
    int clusterAssignments[] = new int[exampleSet.size()];
    int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS);
    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = random.nextInt(k);
    }

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    model.setClusterAssignments(clusterAssignments, exampleSet);

    // generating cluster attribute
    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
Ejemplo n.º 2
0
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    int k = getParameterAsInt(PARAMETER_K);
    int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS);
    boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS);
    Kernel kernel = Kernel.createKernel(this);

    // init operator progress
    getProgress().setTotal(maxOptimizationSteps);

    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // additional checks
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);

    if (exampleSet.size() < k) {
      throw new UserError(this, 142, k);
    }

    // extracting attribute names
    Attributes attributes = exampleSet.getAttributes();
    ArrayList<String> attributeNames = new ArrayList<String>(attributes.size());
    for (Attribute attribute : attributes) {
      attributeNames.add(attribute.getName());
    }
    Attribute weightAttribute = attributes.getWeight();

    RandomGenerator generator = RandomGenerator.getRandomGenerator(this);

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    // init centroids
    int[] clusterAssignments = new int[exampleSet.size()];

    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = generator.nextIntInRange(0, k);
    }

    // run optimization steps
    boolean stable = false;
    for (int step = 0; step < maxOptimizationSteps && !stable; step++) {
      // calculating cluster kernel properties
      double[] clusterWeights = new double[k];
      double[] clusterKernelCorrection = new double[k];
      int i = 0;
      for (Example firstExample : exampleSet) {
        double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d;
        double[] firstExampleValues = getAsDoubleArray(firstExample, attributes);
        clusterWeights[clusterAssignments[i]] += firstExampleWeight;
        int j = 0;
        for (Example secondExample : exampleSet) {
          if (clusterAssignments[i] == clusterAssignments[j]) {
            double secondExampleWeight =
                useExampleWeights ? secondExample.getValue(weightAttribute) : 1d;
            clusterKernelCorrection[clusterAssignments[i]] +=
                firstExampleWeight
                    * secondExampleWeight
                    * kernel.calculateDistance(
                        firstExampleValues, getAsDoubleArray(secondExample, attributes));
          }
          j++;
        }
        i++;
      }
      for (int z = 0; z < k; z++) {
        clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z];
      }

      // assign examples to new centroids
      int[] newClusterAssignments = new int[exampleSet.size()];
      i = 0;
      for (Example example : exampleSet) {
        double[] exampleValues = getAsDoubleArray(example, attributes);
        double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues);
        double nearestDistance = Double.POSITIVE_INFINITY;
        int nearestIndex = 0;
        for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) {
          double distance = 0;
          // iterating over all examples in cluster to get kernel distance
          int j = 0;
          for (Example clusterExample : exampleSet) {
            if (clusterAssignments[j] == clusterIndex) {
              distance +=
                  (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d)
                      * kernel.calculateDistance(
                          getAsDoubleArray(clusterExample, attributes), exampleValues);
            }
            j++;
          }
          distance *= -2d / clusterWeights[clusterIndex];
          // copy in outer loop
          distance += exampleKernelValue;
          distance += clusterKernelCorrection[clusterIndex];
          if (distance < nearestDistance) {
            nearestDistance = distance;
            nearestIndex = clusterIndex;
          }
        }
        newClusterAssignments[i] = nearestIndex;
        i++;
      }

      // finishing assignment
      stable = true;
      for (int j = 0; j < exampleSet.size() && stable; j++) {
        stable &= newClusterAssignments[j] == clusterAssignments[j];
      }
      clusterAssignments = newClusterAssignments;

      // trigger operator progress
      getProgress().step();
    }

    // setting last clustering into model
    model.setClusterAssignments(clusterAssignments, exampleSet);

    getProgress().complete();

    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
  // private IOObject getValidations( WhiBoCentroidClusterModel
  // centroidClusterModel, ExampleSet exampleSet) {
  private IOObject getValidations(ClusterModel centroidClusterModel, ExampleSet exampleSet) {

    CentroidClusterModel centroidClusterModelRapid = null;
    WhiBoCentroidClusterModel centroidClusterModelWhiBo = null;

    if (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) {

      centroidClusterModelRapid = (CentroidClusterModel) centroidClusterModel;
    } else {

      centroidClusterModelWhiBo = (WhiBoCentroidClusterModel) centroidClusterModel;
    }

    // Gets distance measure from the component repository on the basis of
    // selected parameter
    String className = getParameterType("Distance_Measure").toString(0);
    rs.fon.whibo.GC.component.DistanceMeasure.DistanceMeasure distance = null;
    Constructor c = null;
    try {
      c = Class.forName(className).getConstructor(new Class[] {List.class});

      distance =
          (rs.fon.whibo.GC.component.DistanceMeasure.DistanceMeasure)
              c.newInstance(new Object[] {new LinkedList<SubproblemParameter>()});
    } catch (Exception e) {
    }

    Evaluation e = null;
    double evaluation = 0;
    MyPerformanceCriterion mpc = new MyPerformanceCriterion();

    LinkedList<SubproblemParameter> ll = new LinkedList<SubproblemParameter>();
    // Instantiates sub-problem parameter class for validation measures that
    // need parameters
    SubproblemParameter sp = new SubproblemParameter();
    sp.setParametertType(Integer.class);
    sp.setMinValue("1");
    sp.setMaxValue("1000");

    // Gets and executes evaluation components from the component repository
    // on the basis of selected parameter
    if (getParameterAsBoolean("Intra_Cluster_Distance")) {
      e = new IntraClusterDistance(ll);
      if (centroidClusterModel.getClass().equals(CentroidClusterModel.class))
        evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet);
      else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet);

      mpc.addPerformance("Intra_Cluster_Distance", evaluation);
    }
    if (getParameterAsBoolean("Connectivity")) {

      try {
        sp.setXenteredValue(getParameter("NN_Connectivity").toString());
      } catch (UndefinedParameterError e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      } catch (IllegalArgumentException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }

      ll.add(sp);
      e = new Connectivity(ll);
      if (centroidClusterModel.getClass().equals(CentroidClusterModel.class))
        evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet);
      else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet);
      mpc.addPerformance("Connectivity", evaluation);
    }
    if (getParameterAsBoolean("Global_Silhouette_Index")) {
      e = new GlobalSilhouetteIndex(ll);
      if (centroidClusterModel.getClass().equals(CentroidClusterModel.class))
        evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet);
      else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet);
      mpc.addPerformance("Global_Silhouette_Index", evaluation);
    }
    if (getParameterAsBoolean("XB_Index")) {
      e = new XBIndex(ll);
      if (centroidClusterModel.getClass().equals(CentroidClusterModel.class))
        evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet);
      else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet);
      mpc.addPerformance("XB_Index", evaluation);
    }
    if (getParameterAsBoolean("Min_Max_Cut")) {
      e = new MInMaxCut(ll);
      if (centroidClusterModel.getClass().equals(CentroidClusterModel.class))
        evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet);
      else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet);
      mpc.addPerformance("Min_Max_Cut", evaluation);
    }
    if (getParameterAsBoolean("Symmetry")) {
      try {
        sp.setXenteredValue(getParameter("NN_Symmetry").toString());
      } catch (UndefinedParameterError e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      } catch (IllegalArgumentException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      ll.removeFirstOccurrence(sp);
      ll.add(sp);
      // e=new Symmetry(ll);
      // if
      // (centroidClusterModel.getClass().equals(CentroidClusterModel.class))
      // evaluation = e.Evaluate(distance, centroidClusterModelRapid,
      // exampleSet);
      // else
      // evaluation = e.Evaluate(distance, centroidClusterModelWhiBo,
      // exampleSet);
      // mpc.addPerformance("Symmetry", evaluation);
      // }
      // if(getParameterAsBoolean("BIC")){
      // e=new BIC(ll);
      // if
      // (centroidClusterModel.getClass().equals(CentroidClusterModel.class))
      // evaluation = e.Evaluate(distance, centroidClusterModelRapid,
      // exampleSet);
      // else
      // evaluation = e.Evaluate(distance, centroidClusterModelWhiBo,
      // exampleSet);
      // mpc.addPerformance("BIC", evaluation);
    }

    if (getParameterAsBoolean("Fowlkes_Mallows_Index")) {
      e = new FowlkesMallowsIndex(ll);
      if (centroidClusterModel.getClass().equals(CentroidClusterModel.class))
        evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet);
      else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet);

      mpc.addPerformance("Fowlkes_Mallows_Index", evaluation);
    }
    if (getParameterAsBoolean("Jaccard_Index")) {
      e = new JaccardIndex(ll);
      if (centroidClusterModel.getClass().equals(CentroidClusterModel.class))
        evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet);
      else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet);

      mpc.addPerformance("Jaccard_Index", evaluation);
    }
    if (getParameterAsBoolean("Rand_Index")) {
      e = new RandIndex(ll);

      evaluation = e.Evaluate(distance, centroidClusterModel, exampleSet);

      mpc.addPerformance("Rand_Index", evaluation);
    }
    if (getParameterAsBoolean("Adjusted_Rand_Index")) {
      e = new AdjustedRandIndex(ll);
      if (centroidClusterModel.getClass().equals(CentroidClusterModel.class))
        evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet);
      else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet);

      mpc.addPerformance("Adjusted_Rand_Index", evaluation);
    }

    return mpc;
  }