/** * Based on "Details of the Adjusted Rand index and Clustering algorithms Supplement to the paper * “An empirical study on Principal Component Analysis for clustering gene expression data” (to * appear in Bioinformatics)" * * @return */ public Clustering<Instance, Cluster<Instance>> pcaData() { Clustering<Instance, Cluster<Instance>> clustering = new ClusterList<>(3); Random rand = new Random(); int size = 10; Dataset<? extends Instance> data = new ArrayDataset<>(size, 2); data.attributeBuilder().create("x1", "NUMERIC"); data.attributeBuilder().create("x2", "NUMERIC"); InstanceBuilder<? extends Instance> builder = data.builder(); BaseCluster c1 = new BaseCluster(2); clustering.add(c1); BaseCluster c2 = new BaseCluster(3); clustering.add(c2); BaseCluster c3 = new BaseCluster(5); clustering.add(c3); c1.add(next(rand, builder, "u1")); c1.add(next(rand, builder, "u2")); c2.add(next(rand, builder, "u1")); c2.add(next(rand, builder, "u2")); c2.add(next(rand, builder, "u2")); c3.add(next(rand, builder, "u2")); c3.add(next(rand, builder, "u3")); c3.add(next(rand, builder, "u3")); c3.add(next(rand, builder, "u3")); c3.add(next(rand, builder, "u3")); clustering.lookupAdd(data); return clustering; }
public Clustering<Instance, Cluster<Instance>> oneClassPerCluster() { Clustering<Instance, Cluster<Instance>> oneClass = new ClusterList(3); int size = 10; Random rand = new Random(); Dataset<? extends Instance> data = new ArrayDataset<>(size, 2); data.attributeBuilder().create("x1", "NUMERIC"); data.attributeBuilder().create("x2", "NUMERIC"); for (int i = 0; i < size; i++) { Instance inst = next(rand, data.builder(), "same class"); // cluster with single class BaseCluster clust = new BaseCluster(1); clust.add(inst); oneClass.add(clust); } oneClass.lookupAdd(data); return oneClass; }