예제 #1
0
  private void _cluster(double[][] data, int k) {
    long clock = System.currentTimeMillis();
    SpectralClustering cluster = new SpectralClustering(data, k, 0.355);
    System.out.format(
        "DBSCAN clusterings %d samples in %dms\n", data.length, System.currentTimeMillis() - clock);
    System.out.println("getNumClusters:" + cluster.getNumClusters());
    System.out.println("getClusterSize:" + cluster.getClusterSize());
    //        System.out.println(JSON.toJSONString(dbscan.getClusterSize()));
    System.out.println("toString:" + cluster.toString());
    /** ************************************************************ */
    boolean more = true;
    EigenValueDecomposition eigen = cluster.getEigen();
    double[] lab = eigen.getEigenValues();
    double sd = smile.math.Math.sd(eigen.getEigenValues());

    System.out.println("sd(eigen.getEigenValues()):" + sd);
    if (Math.min(eigen.getEigenValues()) > 0.3) {
      result = cluster;
      cluster(data, k + 1);
    } else {
      return;
    }
  }
예제 #2
0
파일: FLD.java 프로젝트: grue/smile
  /**
   * Constructor. Learn Fisher's linear discriminant.
   *
   * @param x training instances.
   * @param y training labels in [0, k), where k is the number of classes.
   * @param L the dimensionality of mapped space.
   * @param tol a tolerance to decide if a covariance matrix is singular; it will reject variables
   *     whose variance is less than tol<sup>2</sup>.
   */
  public FLD(double[][] x, int[] y, int L, double tol) {
    if (x.length != y.length) {
      throw new IllegalArgumentException(
          String.format("The sizes of X and Y don't match: %d != %d", x.length, y.length));
    }

    // class label set.
    int[] labels = Math.unique(y);
    Arrays.sort(labels);

    for (int i = 0; i < labels.length; i++) {
      if (labels[i] < 0) {
        throw new IllegalArgumentException("Negative class label: " + labels[i]);
      }

      if (i > 0 && labels[i] - labels[i - 1] > 1) {
        throw new IllegalArgumentException("Missing class: " + labels[i] + 1);
      }
    }

    k = labels.length;
    if (k < 2) {
      throw new IllegalArgumentException("Only one class.");
    }

    if (tol < 0.0) {
      throw new IllegalArgumentException("Invalid tol: " + tol);
    }

    if (x.length <= k) {
      throw new IllegalArgumentException(
          String.format("Sample size is too small: %d <= %d", x.length, k));
    }

    if (L >= k) {
      throw new IllegalArgumentException(
          String.format("The dimensionality of mapped space is too high: %d >= %d", L, k));
    }

    if (L <= 0) {
      L = k - 1;
    }

    final int n = x.length;
    p = x[0].length;

    // The number of instances in each class.
    int[] ni = new int[k];
    // Common mean vector.
    mean = Math.colMean(x);
    // Common covariance.
    double[][] T = new double[p][p];
    // Class mean vectors.
    mu = new double[k][p];

    for (int i = 0; i < n; i++) {
      int c = y[i];
      ni[c]++;
      for (int j = 0; j < p; j++) {
        mu[c][j] += x[i][j];
      }
    }

    for (int i = 0; i < k; i++) {
      for (int j = 0; j < p; j++) {
        mu[i][j] = mu[i][j] / ni[i] - mean[j];
      }
    }

    for (int i = 0; i < n; i++) {
      for (int j = 0; j < p; j++) {
        for (int l = 0; l <= j; l++) {
          T[j][l] += (x[i][j] - mean[j]) * (x[i][l] - mean[l]);
        }
      }
    }

    for (int j = 0; j < p; j++) {
      for (int l = 0; l <= j; l++) {
        T[j][l] /= n;
        T[l][j] = T[j][l];
      }
    }

    // Between class scatter
    double[][] B = new double[p][p];
    for (int i = 0; i < k; i++) {
      for (int j = 0; j < p; j++) {
        for (int l = 0; l <= j; l++) {
          B[j][l] += mu[i][j] * mu[i][l];
        }
      }
    }

    for (int j = 0; j < p; j++) {
      for (int l = 0; l <= j; l++) {
        B[j][l] /= k;
        B[l][j] = B[j][l];
      }
    }

    EigenValueDecomposition eigen = EigenValueDecomposition.decompose(T, true);

    tol = tol * tol;
    double[] s = eigen.getEigenValues();
    for (int i = 0; i < s.length; i++) {
      if (s[i] < tol) {
        throw new IllegalArgumentException("The covariance matrix is close to singular.");
      }

      s[i] = 1.0 / s[i];
    }

    double[][] U = eigen.getEigenVectors();
    double[][] UB = Math.atbmm(U, B);

    for (int i = 0; i < k; i++) {
      for (int j = 0; j < p; j++) {
        UB[i][j] *= s[j];
      }
    }

    Math.abmm(U, UB, B);

    eigen = EigenValueDecomposition.decompose(B, true);

    U = eigen.getEigenVectors();
    scaling = new double[p][L];
    for (int i = 0; i < p; i++) {
      System.arraycopy(U[i], 0, scaling[i], 0, L);
    }

    smean = new double[L];
    Math.atx(scaling, mean, smean);
    smu = Math.abmm(mu, scaling);
  }