コード例 #1
0
  public void recalculateClusterDistances(List<ClusterDendogram> clusters, Cluster.distType type) {
    for (int clustOne = 0; clustOne < clusters.size(); clustOne++) {
      for (int clustTwo = clustOne + 1; clustTwo < clusters.size(); clustTwo++) {
        Cluster cluster_A = clusters.get(clustOne).getCluster();
        Cluster cluster_B = clusters.get(clustTwo).getCluster();

        cluster_A.recalculateDistanceTo(cluster_B, type, mUpperThreshold, mLowerThreshold);
        cluster_B.recalculateDistanceTo(cluster_A, type, mUpperThreshold, mLowerThreshold);
      }
    }
  }
コード例 #2
0
  /*
   * move clusters to new cluster list, when one of the clusters that will be merged
   * is found, merge it with the other cluster, then add to new cluster list
   * only do this for one cluster to be merged to avoid duplicates
   */
  private List<ClusterDendogram> combineClusters(
      List<ClusterDendogram> clusters, Point minNdx, double correlation, Cluster.distType type) {
    // System.out.printf("minNdx for cluster combining is <%f, %f>\n", minNdx.getX(),
    // minNdx.getY());
    ArrayList<ClusterDendogram> newClusters = new ArrayList<ClusterDendogram>();
    // ArrayList<Dendogram> newDendogram = new ArrayList<Dendogram>();

    for (int clusterNdx = 0; clusterNdx < clusters.size(); clusterNdx++) {
      if (clusterNdx != (int) minNdx.getX() && clusterNdx != (int) minNdx.getY()) {
        newClusters.add(clusters.get(clusterNdx));
        // newDendogram.add(dendogram.get(clusterNdx));
      } else if (clusterNdx == (int) minNdx.getX()) {
        // using minNdx for cluster one for consistency and readability
        // System.out.printf("minNdx X: %d minNdx Y: %d clustersLength: %d", (int) minNdx.getX(),
        // (int) minNdx.getY(), clusters.size());
        Cluster clusterOne = clusters.get((int) minNdx.getX()).getCluster();
        Cluster clusterTwo = clusters.get((int) minNdx.getY()).getCluster();

        System.out.printf(
            "combining clusters:\n===\n\n%s\n and \n%s\nwith maxSimiliarty: %.03f\n\n===",
            clusterOne, clusterTwo, correlation);

        Cluster combinedCluster = new Cluster(clusterOne.unionWith(clusterTwo));

        // using minNdx for dendogram one for consistency and readability
        Dendogram leftDend = clusters.get((int) minNdx.getX()).getDendogram();
        Dendogram rightDend = clusters.get((int) minNdx.getY()).getDendogram();
        // Dendogram newDendogram = new DendogramNode(clusterOne.actualDistance(clusterTwo, type),
        // leftDend, rightDend);
        Dendogram newDendogram =
            new DendogramNode(clusterOne.corrDistance(clusterTwo, type), leftDend, rightDend);

        newClusters.add(new ClusterDendogram(combinedCluster, newDendogram));
      }
    }

    // make the new cluster set into the current cluster set for next iteration
    return newClusters;
    // clusterer.dendogram = newDendogram;
  }
コード例 #3
0
  // TODO check clusters vs dailyClusters, there's a problem in this method
  private List<ClusterDendogram> clusterToDate(
      List<ClusterDendogram> clusters,
      List<ClusterDendogram> dailyClusters,
      Cluster.distType type) {
    // System.out.printf("Clustering clusters (%d) with new day's clusters (%d)\n", clusters.size(),
    // dailyClusters.size());

    // outer for loop loops over clusters in a day
    // inner for loop loops over clusters built up to the current day
    /*
     * clustering between days uses just correlations
     */
    // System.out.printf("\n***clustering to date***\n");
    for (ClusterDendogram newClusterDend : dailyClusters) {
      Cluster newCluster = newClusterDend.getCluster();

      // double minDist = Double.MAX_VALUE;
      double maxSimilarity = 0;
      int closeClusterNdx = -1;

      for (int clustNdx = 0; clustNdx < clusters.size(); clustNdx++) {
        Cluster currClust = clusters.get(clustNdx).getCluster();
        double clustDist = newCluster.corrDistance(currClust, type);

        /*
        System.out.printf("\n\nnewCluster: \n\t%s\n\ncurrClust:\n\t%s\n\n", newCluster, currClust);
        System.out.printf("clustDist: %.03f\n", clustDist);
        */

        // if (clustDist < minDist && clustDist < mLowerThreshold) {
        // System.out.println("cluster to date ward's distance: " + clustDist);
        // if (clustDist < minDist && clustDist >= mUpperThreshold) {
        /*
        System.out.printf("is %.03f > %.03f? %s\n\n", clustDist, maxSimilarity, (clustDist > maxSimilarity));
        System.out.printf("mThreshold = %.03f\n", mThresholding);
        */

        // if (clustDist > maxSimilarity && clustDist > mThresholding) {
        if (mClusterPreference.equals("structure")) {
          if (clustDist > maxSimilarity && !newCluster.isDifferent(currClust)) {
            maxSimilarity = clustDist;
            // System.out.printf("maxSimilarity: %.03f\n", maxSimilarity);
            closeClusterNdx = clustNdx;
          }
        } else if (mClusterPreference.equals("similarity")) {
          if (clustDist > maxSimilarity && newCluster.isSimilar(currClust)) {
            maxSimilarity = clustDist;
            // System.out.printf("maxSimilarity: %.03f\n", maxSimilarity);
            closeClusterNdx = clustNdx;
          }
        }
      }

      // replace the cluster closest to the new Cluster with the
      // oldCluster U newCluster
      if (closeClusterNdx != -1) {
        Cluster closeCluster = clusters.get(closeClusterNdx).getCluster();

        Dendogram newDendogram =
            new DendogramNode(
                maxSimilarity,
                newClusterDend.getDendogram(),
                clusters.get(closeClusterNdx).getDendogram());
        ClusterDendogram newClustDend =
            new ClusterDendogram(closeCluster.unionWith(newCluster), newDendogram);

        clusters.set(closeClusterNdx, newClustDend);
      } else {
        clusters.add(newClusterDend);
      }
    }
    // System.out.printf("clusters combined into size %d\n", clusters.size());
    // System.out.printf("\n***Finished clustering new day's clusters***\n");

    return clusters;
  }
コード例 #4
0
  private List<ClusterDendogram> clusterGroup(
      List<ClusterDendogram> clusters, Cluster.distType type) {
    // System.out.printf("***clustering new group***\n");
    Point closeClusters = new Point(-1, -1);
    // double minDist = Double.MAX_VALUE;
    double maxSimilarity = 0;
    boolean hasChanged;

    do {
      // System.out.printf("entering clustering loop...\n");
      hasChanged = false;

      for (int clustOne = 0; clustOne < clusters.size(); clustOne++) {
        for (int clustTwo = clustOne + 1; clustTwo < clusters.size(); clustTwo++) {
          Cluster cluster_A = clusters.get(clustOne).getCluster();
          Cluster cluster_B = clusters.get(clustTwo).getCluster();

          // System.out.printf("\n\ncluster A:\n\t%s\n\ncluster B:\n\t%s\n\n", cluster_A,
          // cluster_B);
          // double clustDist = cluster_A.distance(cluster_B, type);

          // this will ensure that i'm only comparing based on correlations
          double clustDist = cluster_A.corrDistance(cluster_B, type);
          // System.out.printf("clustDist: %.03f\n", clustDist);
          /*
          if (clustDist > 1) {
             System.err.println("cluster group clustDist: " + clustDist + " between " + cluster_A + " and " + cluster_B);
          }
          */

          // System.out.println("ward's distance: " + clustDist);
          // if (clustDist < minDist && clustDist > 99.7 ) {
          // if (clustDist < minDist && clustDist < .03 ) {
          // if (clustDist < minDist && clustDist > 99.7 ) { this
          // corresponds to results used in paper
          // TODO  investigate the results for when you use '> minDist'
          // if (/*clustDist > minDist &&*/ clustDist > 99.7 ) {
          // System.out.printf("clustDist: %.03f\n", clustDist);

          // if (clustDist > minDist && clustDist > mThresholding ) {
          // System.out.printf("is %.03f > %.03f? %s\n\n", clustDist, maxSimilarity, (clustDist >
          // maxSimilarity));
          // System.out.printf("mThreshold = %.03f\n", mThresholding);
          // if (clustDist > maxSimilarity && clustDist > mThresholding) {
          if (mClusterPreference.equals("structure")) {
            if (clustDist > maxSimilarity && !cluster_A.isDifferent(cluster_B)) {
              maxSimilarity = clustDist;
              // System.out.printf("maxSimilarity: %.03f\n", maxSimilarity);
              closeClusters = new Point(clustOne, clustTwo);
            }
          } else if (mClusterPreference.equals("similarity")) {
            if (clustDist > maxSimilarity && cluster_A.isSimilar(cluster_B)) {
              maxSimilarity = clustDist;
              // System.out.printf("maxSimilarity: %.03f\n", maxSimilarity);
              closeClusters = new Point(clustOne, clustTwo);
            }
          }
        }
      }

      /*
       * if newCluster list is a different sized then clearly two clusters were
       * combined. In this case set hasChanges to true and set the cluster list to
       * the new cluster list
       */
      List<ClusterDendogram> newClusters =
          combineClusters(clusters, closeClusters, maxSimilarity, type);

      if (newClusters.size() != clusters.size()) {
        hasChanged = true;
        clusters = newClusters;
        System.out.println("recalculating cluster distances...");
        recalculateClusterDistances(clusters, type);
        System.out.println("finished recalculating cluster distances...");
      }

      // reset various variables
      closeClusters = new Point(-1, -1);
      maxSimilarity = 0;
      // System.out.printf("finishing clustering iteration...\n");

      // continue clustering until clusters do not change
    } while (hasChanged);

    // System.out.printf("***Finished clustering group***\n");

    return clusters;
  }