Пример #1
0
  @Test
  public void testSingleLinkage() {
    Dataset<? extends Instance> dataset = FakeClustering.kumarData();
    assertEquals(6, dataset.size());
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING);
    pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    Matrix similarityMatrix = result.getProximityMatrix();
    assertNotNull(similarityMatrix);
    assertEquals(similarityMatrix.rowsCount(), dataset.size());
    assertEquals(similarityMatrix.columnsCount(), dataset.size());
    System.out.println("kumar - single");
    DendroTreeData tree = result.getTreeData();
    tree.print();
    assertEquals(dataset.size(), tree.numLeaves());
    DendroNode root = tree.getRoot();
    assertEquals(0.21587033144922904, root.getHeight(), DELTA);

    int levels = tree.distinctHeights(1e-7);
    // TODO: in this example nodes #7 and #8 are on different level,
    // but their height is the same. should we consider those as different
    assertEquals(4, levels);
  }
Пример #2
0
  @Test
  public void testColumnClustering() throws IOException {
    Dataset<? extends Instance> dataset = FakeClustering.schoolData();
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.COLUMNS_CLUSTERING);
    pref.put(PropType.PERFORMANCE, AlgParams.KEEP_PROXIMITY, true);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    Matrix similarityMatrix = result.getProximityMatrix();
    assertNotNull(similarityMatrix);
    assertEquals(similarityMatrix.rowsCount(), dataset.attributeCount());
    assertEquals(similarityMatrix.columnsCount(), dataset.attributeCount());

    result.getTreeData().print();
  }
Пример #3
0
  @Test
  public void testSingleLinkageSchool() {
    Dataset<? extends Instance> dataset = FakeClustering.schoolData();
    assertEquals(17, dataset.size());
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    System.out.println("school - single");
    DendroTreeData tree = result.getTreeData();
    tree.print();
    assertEquals(dataset.size(), tree.numLeaves());
    DendroNode root = tree.getRoot();
    assertEquals(32.542734980330046, root.getHeight(), DELTA);
    assertEquals(2 * dataset.size() - 1, tree.numNodes());

    assertEquals(16, tree.distinctHeights());
    assertEquals(8, tree.treeLevels());
  }
Пример #4
0
 private void init() {
   resultType =
       ClusteringType.parse(
           props.getObject(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING));
   cutoffStrategy = CutoffStrategyFactory.getInstance().getDefault();
   if (cutoffStrategy != null) {
     InternalEvaluatorFactory<E, Cluster<E>> ief = InternalEvaluatorFactory.getInstance();
     cutoffStrategy.setEvaluator(ief.getDefault());
   }
   noise = null;
 }
Пример #5
0
  @Override
  public double score(Cluster<E> a, Cluster<E> b, Props params) {
    checkClusters(a, b);

    GraphCluster<E> x = (GraphCluster<E>) a;
    GraphCluster<E> y = (GraphCluster<E>) b;
    double RCL = closeness.getRCL(x, y);
    double closenessPriority = params.getDouble(Chameleon.CLOSENESS_PRIORITY, 2.0);

    return interconnectivity.score(a, b, params) * Math.pow(RCL, closenessPriority);
  }
Пример #6
0
  @Test
  public void testInverseSorting() {
    Dataset<? extends Instance> dataset = FakeClustering.kumarData();
    assertEquals(6, dataset.size());
    Props pref = new Props();
    pref.put(AlgParams.LINKAGE, SingleLinkage.name);
    pref.put(AlgParams.CLUSTERING_TYPE, ClusteringType.ROWS_CLUSTERING);
    // inverse ordering
    pref.put(AlgParams.SMALLEST_FIRST, false);
    HierarchicalResult result = subject.hierarchy(dataset, pref);
    System.out.println("kumar - inverse");
    DendroTreeData tree = result.getTreeData();
    tree.print();
    assertEquals(dataset.size(), tree.numLeaves());
    DendroNode root = tree.getRoot();
    assertEquals(0.10198039027185574, root.getHeight(), DELTA);

    assertEquals(5, tree.distinctHeights());
    assertEquals(4, tree.treeLevels());
  }
Пример #7
0
  @Override
  public Clustering<E, C> reduce(
      Clustering[] clusts, Algorithm<E, C> alg, ColorGenerator cg, Props props) {
    int k = props.getInt(KMeans.K);

    Clustering<E, C> result = new ClusterList<>(k); // reducer - find consensus
    // vote about final result
    E curr;
    Iterator<E> it = clusts[0].instancesIterator();
    Cluster<E> cluster;
    int[][] mapping = findMapping(clusts, k, alg.getDistanceFunction());

    if (cg != null) {
      cg.reset();
    }

    int idx;
    while (it.hasNext()) {
      curr = it.next();
      int[] assign = new int[k];
      for (int i = 0; i < clusts.length; i++) {
        cluster = clusts[i].assignedCluster(curr);
        if (i > 0) {
          assign[map(mapping, i, cluster.getClusterId())]++;
        } else {
          assign[cluster.getClusterId()]++;
        }
      }
      idx = findMax(assign);
      // check if cluster already exists
      if (!result.hasAt(idx)) {
        result.createCluster(idx);
        if (cg != null) {
          result.get(idx).setColor(cg.next());
        }
      }
      // final cluster assignment
      result.get(idx).add(curr);
    }
    result.compact();

    return result;
  }
Пример #8
0
  @Override
  public String runMetis(Graph graph, int k, Props params) {
    long current = System.currentTimeMillis();
    File file = new File("inputGraph-" + current);
    String filename = file.getName();
    Process p;
    try {
      graph.hMetisExport(file, false);
      if (metisFile == null) {
        // fetch the file just once
        metisFile = getBinary();
        System.out.println("metis path: " + metisFile.getAbsolutePath());
      }

      // run metis
      String space = " ";
      StringBuilder sb = new StringBuilder(metisFile.getAbsolutePath());
      sb.append(" ")
          .append(filename)
          .append(" ")
          .append(String.valueOf(k))
          .append(space)
          .append("-ufactor=")
          .append(String.valueOf(params.getDouble(UFACTOR, 5.0)))
          .append(space)
          .append("-nruns=")
          .append(String.valueOf(params.getInt(NRUNS, 10)))
          .append(space)
          .append("-ptype=")
          .append(String.valueOf(params.get(PTYPE, "rb")))
          .append(space)
          .append("-otype=")
          .append(String.valueOf(params.get(OTYPE, "cut")))
          .append(space);
      if (params.containsKey(CTYPE)) {
        sb.append("-ctype=").append(params.get(CTYPE)).append(space);
      }
      if (params.containsKey(RTYPE)) {
        sb.append("-rtype=").append(params.get(RTYPE)).append(space);
      }

      // sb.append(String.valueOf(vcycle)).append(space);
      p = Runtime.getRuntime().exec(sb.toString());
      p.waitFor();
      if (debug) {
        System.out.println("cmd: " + sb.toString());
        System.out.println("exit code: " + p.exitValue());
        if (p.exitValue() != 1) {
          // System.out.println(ExtBinHelper.readFile(file));
        }

        helper.readStdout(p);
        helper.readStderr(p);
      }
      file.delete();
    } catch (FileNotFoundException | UnsupportedEncodingException ex) {
      Exceptions.printStackTrace(ex);
    } catch (IOException | InterruptedException ex) {
      Exceptions.printStackTrace(ex);
    }
    return filename;
  }
Пример #9
0
  @Override
  public Clustering<E, C> reduce(
      Clustering[] clusts, Algorithm<E, C> alg, ColorGenerator cg, Props props) {
    Graph graph = createGraph(clusts);

    // degree of freedom
    double df;
    double w, attain;
    EdgeIterable neigh;
    PriorityQueue<DoubleElem> pq = new PriorityQueue<>(graph.getNodeCount());
    DoubleElem<Node> elem;
    // for each node compute attainment score
    for (Node node : graph.getNodes()) {
      neigh = graph.getEdges(node);
      df = neigh.size();
      w = 0.0;
      for (Edge ne : neigh) {
        w += ne.getWeight();
      }
      attain = w / df;
      elem = new DoubleElem<>(node, attain);
      pq.add(elem);
    }

    // number of clusters is just a hint
    int k = props.getInt(KMeans.K, 5);
    double relax = props.getDouble(RELAX, 0.5);
    Clustering<E, C> result = new ClusterList(k);
    Dataset<? extends Instance> dataset = clusts[0].getLookup().lookup(Dataset.class);
    result.lookupAdd(dataset);
    ObjectOpenHashSet<Node> blacklist = new ObjectOpenHashSet();
    Node node, other;
    Cluster curr;
    double maxW;
    while (!pq.isEmpty()) {
      elem = pq.poll();
      node = elem.getElem();
      if (!blacklist.contains(node)) {
        blacklist.add(node);
        curr = result.createCluster();
        if (cg != null) {
          curr.setColor(cg.next());
        }
        curr.add(node.getInstance());

        EdgeIterable iter = graph.getEdges(node);
        maxW = -1;
        for (Edge ne : iter) {
          if (ne.getWeight() > maxW) {
            maxW = ne.getWeight();
          }
        }
        // add immediate neighbours with max weight to same cluster
        if (maxW >= 0.0) {
          for (Edge ne : iter) {
            // when relax set to 0.0, only items with maximum weight
            // will be added to the same cluster
            w = ne.getWeight() + relax * ne.getWeight();
            if (w >= maxW) {
              if (!node.equals(ne.getSource())) {
                other = ne.getSource();
              } else {
                other = ne.getTarget();
              }
              if (!blacklist.contains(other)) {
                curr.add(other.getInstance());
                blacklist.add(other);
              }
            }
          }
        }
      }
    }
    // TODO merge some clusters

    return result;
  }