コード例 #1
0
ファイル: HCTest.java プロジェクト: deric/clueminer
  @Test
  public void testSingleLinkage() {
    Dataset<? extends Instance> dataset = FakeClustering.kumarData();
    assertEquals(6, dataset.size());
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING);
    pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    Matrix similarityMatrix = result.getProximityMatrix();
    assertNotNull(similarityMatrix);
    assertEquals(similarityMatrix.rowsCount(), dataset.size());
    assertEquals(similarityMatrix.columnsCount(), dataset.size());
    System.out.println("kumar - single");
    DendroTreeData tree = result.getTreeData();
    tree.print();
    assertEquals(dataset.size(), tree.numLeaves());
    DendroNode root = tree.getRoot();
    assertEquals(0.21587033144922904, root.getHeight(), DELTA);

    int levels = tree.distinctHeights(1e-7);
    // TODO: in this example nodes #7 and #8 are on different level,
    // but their height is the same. should we consider those as different
    assertEquals(4, levels);
  }
コード例 #2
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 @Override
 public int getMappedIndex(int idx) {
   if (treeData == null) {
     throw new RuntimeException("Empty tree data");
   }
   return treeData.getMappedId(idx);
 }
コード例 #3
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 @Override
 public int treeLevels() {
   if (treeData != null) {
     return treeData.treeLevels();
   }
   return 0;
 }
コード例 #4
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 @Override
 public double cutTreeByLevel(int level) {
   DendroNode node = treeData.getRoot();
   double cut = findLevel(node, level);
   this.clustering = updateCutoff(cut);
   return cut;
 }
コード例 #5
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 private DendroNode getNode(int idx) {
   DendroNode node = treeData.getLeaf(idx);
   if (node == null) {
     node = new DTreeNode();
     node.setId(idx);
   }
   return node;
 }
コード例 #6
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 /**
  * TODO: this is hardly correct
  *
  * @return
  */
 @Override
 public double getMaxTreeHeight() {
   if (treeData == null) {
     LOG.info("constructing tree");
     constructTree();
   }
   return treeData.getRoot().getHeight();
 }
コード例 #7
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 /**
  * Return leaves mapping to indexes in dataset
  *
  * @return
  */
 @Override
 public int[] getMapping() {
   /* if (mapping == null && merges != null) {
    * updateMapping();
    * }
    *
    * return mapping; */
   if (treeData != null) {
     return treeData.getMapping();
   }
   return null;
 }
コード例 #8
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
  private void constructTree() {
    if (merges == null) {
      throw new RuntimeException("merges empty!");
    }
    LOG.info("constructing tree, merge size:{}", merges.size());
    treeData = new DynamicTreeData();

    DendroNode[] nodes = new DendroNode[merges.size() + 1];

    DendroNode current = null;
    DendroNode prev = null;
    // for (Merge m : getMerges()) {

    Merge m;
    for (int i = merges.size() - 1; i >= 0; i--) {
      m = merges.get(i);
      current = new DTreeNode();
      // bottom level
      if (prev == null) {
        prev = getNode(m.remainingCluster());
      }

      current.setLeft(prev);
      current.setRight(getNode(m.mergedCluster()));
      current.setHeight(m.similarity());
      prev = current;
      //  System.out.println("merge: " + m.mergedCluster() + " remain: " + m.remainingCluster() + "
      // similarity = " + m.similarity());
    }
    numNodes = 0;
    // number leaves, so that we can compute it's position
    numberLeaves(current);
    treeData.updatePositions(current);

    treeData.setRoot(current);
    treeData.setLeaves(nodes);
    LOG.info("max tree height: {}", current.getHeight());
  }
コード例 #9
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 private void subtreeToCluster(DendroNode node, Cluster c, int[] assign) {
   if (node.isLeaf()) {
     if (treeData.containsClusters()) {
       DClusterLeaf<E> leaf = (DClusterLeaf) node;
       for (E instance : leaf.getInstances()) {
         c.add(instance);
         assign[instance.getIndex()] = c.getClusterId();
       }
     } else {
       c.add(((DendroLeaf) node).getData());
       assign[node.getId()] = c.getClusterId();
     }
   } else {
     subtreeToCluster(node.getLeft(), c, assign);
     subtreeToCluster(node.getRight(), c, assign);
   }
 }
コード例 #10
0
ファイル: HCTest.java プロジェクト: deric/clueminer
  @Test
  public void testSingleLinkageSchool() {
    Dataset<? extends Instance> dataset = FakeClustering.schoolData();
    assertEquals(17, dataset.size());
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    System.out.println("school - single");
    DendroTreeData tree = result.getTreeData();
    tree.print();
    assertEquals(dataset.size(), tree.numLeaves());
    DendroNode root = tree.getRoot();
    assertEquals(32.542734980330046, root.getHeight(), DELTA);
    assertEquals(2 * dataset.size() - 1, tree.numNodes());

    assertEquals(16, tree.distinctHeights());
    assertEquals(8, tree.treeLevels());
  }
コード例 #11
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 @Override
 public Clustering updateCutoff(double cutoff) {
   this.cutoff = cutoff;
   int[] assign = new int[dataset.size()];
   int estClusters = (int) Math.sqrt(dataset.size());
   colorGenerator.reset();
   num = 0; // human readable
   Clustering clusters = new ClusterList(estClusters);
   DendroNode root = treeData.getRoot();
   if (root != null) {
     checkCutoff(root, cutoff, clusters, assign);
     if (clusters.size() > 0) {
       mapping = assign;
     } else {
       LOG.info("failed to cutoff dendrogram, cut = {}", cutoff);
     }
   }
   // add input dataset to clustering lookup
   if (noise != null) {
     Cluster clust = new BaseCluster<>(noise.size());
     clust.setColor(colorGenerator.next());
     clust.setClusterId(num++);
     clust.setParent(getDataset());
     clust.setName("Noise");
     clust.setAttributes(getDataset().getAttributes());
     for (Instance ins : noise) {
       clust.add(ins);
       mapping[ins.getIndex()] = num - 1;
     }
     clusters.add(clust);
   }
   clusters.lookupAdd(dataset);
   if (dendroMapping != null) {
     clusters.lookupAdd(dendroMapping);
   }
   clusters.lookupAdd(this);
   return clusters;
 }
コード例 #12
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 private void checkCutoff(DendroNode node, double cutoff, Clustering clusters, int[] assign) {
   if (node.isLeaf()) {
     if (treeData.containsClusters()) {
       DClusterLeaf<E> leaf = (DClusterLeaf) node;
       Cluster clust = makeCluster(clusters);
       for (E instance : leaf.getInstances()) {
         clust.add(instance);
         assign[instance.getIndex()] = clust.getClusterId();
       }
     }
     return;
   }
   if (node.getHeight() == cutoff) {
     // both branches goes to the same cluster
     Cluster clust = makeCluster(clusters);
     subtreeToCluster(node, clust, assign);
   } else if (node.getLeft().getHeight() < cutoff || node.getRight().getHeight() < cutoff) {
     Cluster clust;
     if (node.getLeft().getHeight() < cutoff && node.getRight().getHeight() < cutoff) {
       clust = makeCluster(clusters);
       subtreeToCluster(node.getLeft(), clust, assign);
       clust = makeCluster(clusters);
       subtreeToCluster(node.getRight(), clust, assign);
     } else if (node.getRight().getHeight() < cutoff) {
       clust = makeCluster(clusters);
       subtreeToCluster(node.getRight(), clust, assign);
       checkCutoff(node.getLeft(), cutoff, clusters, assign);
     } else if (node.getLeft().getHeight() < cutoff) {
       clust = makeCluster(clusters);
       subtreeToCluster(node.getLeft(), clust, assign);
       checkCutoff(node.getRight(), cutoff, clusters, assign);
     }
   } else {
     checkCutoff(node.getLeft(), cutoff, clusters, assign);
     checkCutoff(node.getRight(), cutoff, clusters, assign);
   }
 }
コード例 #13
0
ファイル: HCTest.java プロジェクト: deric/clueminer
  @Test
  public void testInverseSorting() {
    Dataset<? extends Instance> dataset = FakeClustering.kumarData();
    assertEquals(6, dataset.size());
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING);
    // inverse ordering
    pref.put(AlgParams.SMALLEST_FIRST, false);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    System.out.println("kumar - inverse");
    DendroTreeData tree = result.getTreeData();
    tree.print();
    assertEquals(dataset.size(), tree.numLeaves());
    DendroNode root = tree.getRoot();
    assertEquals(0.10198039027185574, root.getHeight(), DELTA);

    assertEquals(5, tree.distinctHeights());
    assertEquals(4, tree.treeLevels());
  }
コード例 #14
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 @Override
 public void setTreeData(DendroTreeData treeData) {
   this.treeData = treeData;
   treeData.updatePositions(treeData.getRoot());
 }
コード例 #15
0
ファイル: HClustResult.java プロジェクト: deric/clueminer
 @Override
 public double getHeightByLevel(int level) {
   return findLevelHeight(treeData.getRoot(), level);
 }