@Override public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException { // checking and creating ids if necessary Tools.checkAndCreateIds(exampleSet); // generating assignment RandomGenerator random = RandomGenerator.getRandomGenerator(this); int clusterAssignments[] = new int[exampleSet.size()]; int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS); for (int i = 0; i < exampleSet.size(); i++) { clusterAssignments[i] = random.nextInt(k); } ClusterModel model = new ClusterModel( exampleSet, k, getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED)); model.setClusterAssignments(clusterAssignments, exampleSet); // generating cluster attribute if (addsClusterAttribute()) { Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(cluster); exampleSet.getAttributes().setCluster(cluster); int i = 0; for (Example example : exampleSet) { example.setValue(cluster, "cluster_" + clusterAssignments[i]); i++; } } return model; }
@Override public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException { int k = getParameterAsInt(PARAMETER_K); int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS); boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS); Kernel kernel = Kernel.createKernel(this); // init operator progress getProgress().setTotal(maxOptimizationSteps); // checking and creating ids if necessary Tools.checkAndCreateIds(exampleSet); // additional checks Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]); if (exampleSet.size() < k) { throw new UserError(this, 142, k); } // extracting attribute names Attributes attributes = exampleSet.getAttributes(); ArrayList<String> attributeNames = new ArrayList<String>(attributes.size()); for (Attribute attribute : attributes) { attributeNames.add(attribute.getName()); } Attribute weightAttribute = attributes.getWeight(); RandomGenerator generator = RandomGenerator.getRandomGenerator(this); ClusterModel model = new ClusterModel( exampleSet, k, getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED)); // init centroids int[] clusterAssignments = new int[exampleSet.size()]; for (int i = 0; i < exampleSet.size(); i++) { clusterAssignments[i] = generator.nextIntInRange(0, k); } // run optimization steps boolean stable = false; for (int step = 0; step < maxOptimizationSteps && !stable; step++) { // calculating cluster kernel properties double[] clusterWeights = new double[k]; double[] clusterKernelCorrection = new double[k]; int i = 0; for (Example firstExample : exampleSet) { double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d; double[] firstExampleValues = getAsDoubleArray(firstExample, attributes); clusterWeights[clusterAssignments[i]] += firstExampleWeight; int j = 0; for (Example secondExample : exampleSet) { if (clusterAssignments[i] == clusterAssignments[j]) { double secondExampleWeight = useExampleWeights ? secondExample.getValue(weightAttribute) : 1d; clusterKernelCorrection[clusterAssignments[i]] += firstExampleWeight * secondExampleWeight * kernel.calculateDistance( firstExampleValues, getAsDoubleArray(secondExample, attributes)); } j++; } i++; } for (int z = 0; z < k; z++) { clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z]; } // assign examples to new centroids int[] newClusterAssignments = new int[exampleSet.size()]; i = 0; for (Example example : exampleSet) { double[] exampleValues = getAsDoubleArray(example, attributes); double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues); double nearestDistance = Double.POSITIVE_INFINITY; int nearestIndex = 0; for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) { double distance = 0; // iterating over all examples in cluster to get kernel distance int j = 0; for (Example clusterExample : exampleSet) { if (clusterAssignments[j] == clusterIndex) { distance += (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d) * kernel.calculateDistance( getAsDoubleArray(clusterExample, attributes), exampleValues); } j++; } distance *= -2d / clusterWeights[clusterIndex]; // copy in outer loop distance += exampleKernelValue; distance += clusterKernelCorrection[clusterIndex]; if (distance < nearestDistance) { nearestDistance = distance; nearestIndex = clusterIndex; } } newClusterAssignments[i] = nearestIndex; i++; } // finishing assignment stable = true; for (int j = 0; j < exampleSet.size() && stable; j++) { stable &= newClusterAssignments[j] == clusterAssignments[j]; } clusterAssignments = newClusterAssignments; // trigger operator progress getProgress().step(); } // setting last clustering into model model.setClusterAssignments(clusterAssignments, exampleSet); getProgress().complete(); if (addsClusterAttribute()) { Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(cluster); exampleSet.getAttributes().setCluster(cluster); int i = 0; for (Example example : exampleSet) { example.setValue(cluster, "cluster_" + clusterAssignments[i]); i++; } } return model; }
// private IOObject getValidations( WhiBoCentroidClusterModel // centroidClusterModel, ExampleSet exampleSet) { private IOObject getValidations(ClusterModel centroidClusterModel, ExampleSet exampleSet) { CentroidClusterModel centroidClusterModelRapid = null; WhiBoCentroidClusterModel centroidClusterModelWhiBo = null; if (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) { centroidClusterModelRapid = (CentroidClusterModel) centroidClusterModel; } else { centroidClusterModelWhiBo = (WhiBoCentroidClusterModel) centroidClusterModel; } // Gets distance measure from the component repository on the basis of // selected parameter String className = getParameterType("Distance_Measure").toString(0); rs.fon.whibo.GC.component.DistanceMeasure.DistanceMeasure distance = null; Constructor c = null; try { c = Class.forName(className).getConstructor(new Class[] {List.class}); distance = (rs.fon.whibo.GC.component.DistanceMeasure.DistanceMeasure) c.newInstance(new Object[] {new LinkedList<SubproblemParameter>()}); } catch (Exception e) { } Evaluation e = null; double evaluation = 0; MyPerformanceCriterion mpc = new MyPerformanceCriterion(); LinkedList<SubproblemParameter> ll = new LinkedList<SubproblemParameter>(); // Instantiates sub-problem parameter class for validation measures that // need parameters SubproblemParameter sp = new SubproblemParameter(); sp.setParametertType(Integer.class); sp.setMinValue("1"); sp.setMaxValue("1000"); // Gets and executes evaluation components from the component repository // on the basis of selected parameter if (getParameterAsBoolean("Intra_Cluster_Distance")) { e = new IntraClusterDistance(ll); if (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet); else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet); mpc.addPerformance("Intra_Cluster_Distance", evaluation); } if (getParameterAsBoolean("Connectivity")) { try { sp.setXenteredValue(getParameter("NN_Connectivity").toString()); } catch (UndefinedParameterError e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (IllegalArgumentException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } ll.add(sp); e = new Connectivity(ll); if (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet); else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet); mpc.addPerformance("Connectivity", evaluation); } if (getParameterAsBoolean("Global_Silhouette_Index")) { e = new GlobalSilhouetteIndex(ll); if (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet); else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet); mpc.addPerformance("Global_Silhouette_Index", evaluation); } if (getParameterAsBoolean("XB_Index")) { e = new XBIndex(ll); if (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet); else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet); mpc.addPerformance("XB_Index", evaluation); } if (getParameterAsBoolean("Min_Max_Cut")) { e = new MInMaxCut(ll); if (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet); else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet); mpc.addPerformance("Min_Max_Cut", evaluation); } if (getParameterAsBoolean("Symmetry")) { try { sp.setXenteredValue(getParameter("NN_Symmetry").toString()); } catch (UndefinedParameterError e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (IllegalArgumentException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } ll.removeFirstOccurrence(sp); ll.add(sp); // e=new Symmetry(ll); // if // (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) // evaluation = e.Evaluate(distance, centroidClusterModelRapid, // exampleSet); // else // evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, // exampleSet); // mpc.addPerformance("Symmetry", evaluation); // } // if(getParameterAsBoolean("BIC")){ // e=new BIC(ll); // if // (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) // evaluation = e.Evaluate(distance, centroidClusterModelRapid, // exampleSet); // else // evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, // exampleSet); // mpc.addPerformance("BIC", evaluation); } if (getParameterAsBoolean("Fowlkes_Mallows_Index")) { e = new FowlkesMallowsIndex(ll); if (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet); else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet); mpc.addPerformance("Fowlkes_Mallows_Index", evaluation); } if (getParameterAsBoolean("Jaccard_Index")) { e = new JaccardIndex(ll); if (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet); else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet); mpc.addPerformance("Jaccard_Index", evaluation); } if (getParameterAsBoolean("Rand_Index")) { e = new RandIndex(ll); evaluation = e.Evaluate(distance, centroidClusterModel, exampleSet); mpc.addPerformance("Rand_Index", evaluation); } if (getParameterAsBoolean("Adjusted_Rand_Index")) { e = new AdjustedRandIndex(ll); if (centroidClusterModel.getClass().equals(CentroidClusterModel.class)) evaluation = e.Evaluate(distance, centroidClusterModelRapid, exampleSet); else evaluation = e.Evaluate(distance, centroidClusterModelWhiBo, exampleSet); mpc.addPerformance("Adjusted_Rand_Index", evaluation); } return mpc; }