예제 #1
0
  /**
   * Based on "Details of the Adjusted Rand index and Clustering algorithms Supplement to the paper
   * “An empirical study on Principal Component Analysis for clustering gene expression data” (to
   * appear in Bioinformatics)"
   *
   * @return
   */
  public Clustering<Instance, Cluster<Instance>> pcaData() {
    Clustering<Instance, Cluster<Instance>> clustering = new ClusterList<>(3);
    Random rand = new Random();
    int size = 10;
    Dataset<? extends Instance> data = new ArrayDataset<>(size, 2);
    data.attributeBuilder().create("x1", "NUMERIC");
    data.attributeBuilder().create("x2", "NUMERIC");

    InstanceBuilder<? extends Instance> builder = data.builder();
    BaseCluster c1 = new BaseCluster(2);
    clustering.add(c1);
    BaseCluster c2 = new BaseCluster(3);
    clustering.add(c2);
    BaseCluster c3 = new BaseCluster(5);
    clustering.add(c3);

    c1.add(next(rand, builder, "u1"));
    c1.add(next(rand, builder, "u2"));
    c2.add(next(rand, builder, "u1"));
    c2.add(next(rand, builder, "u2"));
    c2.add(next(rand, builder, "u2"));
    c3.add(next(rand, builder, "u2"));
    c3.add(next(rand, builder, "u3"));
    c3.add(next(rand, builder, "u3"));
    c3.add(next(rand, builder, "u3"));
    c3.add(next(rand, builder, "u3"));

    clustering.lookupAdd(data);
    return clustering;
  }
예제 #2
0
  public Clustering<Instance, Cluster<Instance>> oneClassPerCluster() {
    Clustering<Instance, Cluster<Instance>> oneClass = new ClusterList(3);
    int size = 10;
    Random rand = new Random();
    Dataset<? extends Instance> data = new ArrayDataset<>(size, 2);
    data.attributeBuilder().create("x1", "NUMERIC");
    data.attributeBuilder().create("x2", "NUMERIC");

    for (int i = 0; i < size; i++) {
      Instance inst = next(rand, data.builder(), "same class");
      // cluster with single class
      BaseCluster clust = new BaseCluster(1);
      clust.add(inst);
      oneClass.add(clust);
    }
    oneClass.lookupAdd(data);
    return oneClass;
  }