Beispiel #1
0
  /** Train L2 tree boost. */
  private void train2(Attribute[] attributes, double[][] x, int[] y) {
    int n = x.length;
    int N = (int) Math.round(n * f);

    int[] y2 = new int[n];
    for (int i = 0; i < n; i++) {
      if (y[i] == 1) {
        y2[i] = 1;
      } else {
        y2[i] = -1;
      }
    }

    y = y2;

    double[] h = new double[n]; // current F(x_i)
    double[] response = new double[n]; // response variable for regression tree.

    double mu = Math.mean(y);
    b = 0.5 * Math.log((1 + mu) / (1 - mu));

    for (int i = 0; i < n; i++) {
      h[i] = b;
    }

    int[][] order = SmileUtils.sort(attributes, x);
    RegressionTree.NodeOutput output = new L2NodeOutput(response);
    trees = new RegressionTree[T];

    int[] perm = new int[n];
    int[] samples = new int[n];
    for (int i = 0; i < n; i++) {
      perm[i] = i;
    }

    for (int m = 0; m < T; m++) {
      Arrays.fill(samples, 0);

      Math.permutate(perm);
      for (int i = 0; i < N; i++) {
        samples[perm[i]] = 1;
      }

      for (int i = 0; i < n; i++) {
        response[i] = 2.0 * y[i] / (1 + Math.exp(2 * y[i] * h[i]));
      }

      trees[m] = new RegressionTree(attributes, x, response, J, order, samples, output);

      for (int i = 0; i < n; i++) {
        h[i] += shrinkage * trees[m].predict(x[i]);
      }
    }
  }
Beispiel #2
0
  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();

    sb.append(String.format("CLARANS distortion: %.5f%n", distortion));
    sb.append(String.format("Clusters of %d data points:%n", y.length));
    for (int i = 0; i < k; i++) {
      int r = (int) Math.round(1000.0 * size[i] / y.length);
      sb.append(String.format("%3d\t%5d (%2d.%1d%%)%n", i, size[i], r / 10, r % 10));
    }

    return sb.toString();
  }
Beispiel #3
0
  /** Convert coordinate to a string. */
  public static String coordToString(double... c) {
    StringBuilder builder = new StringBuilder("(");
    for (int i = 0; i < c.length; i++) {
      builder.append(Math.round(c[i], 2)).append(",");
    }

    if (c.length > 0) {
      builder.setCharAt(builder.length(), ')');
    } else {
      builder.append(")");
    }

    return builder.toString();
  }
Beispiel #4
0
  /** Train L-k tree boost. */
  private void traink(Attribute[] attributes, double[][] x, int[] y) {
    int n = x.length;
    int N = (int) Math.round(n * f);

    double[][] h = new double[k][n]; // boost tree output.
    double[][] p = new double[k][n]; // posteriori probabilities.
    double[][] response = new double[k][n]; // pseudo response.

    int[][] order = SmileUtils.sort(attributes, x);
    forest = new RegressionTree[k][T];

    RegressionTree.NodeOutput[] output = new LKNodeOutput[k];
    for (int i = 0; i < k; i++) {
      output[i] = new LKNodeOutput(response[i]);
    }

    int[] perm = new int[n];
    int[] samples = new int[n];
    for (int i = 0; i < n; i++) {
      perm[i] = i;
    }

    for (int m = 0; m < T; m++) {
      for (int i = 0; i < n; i++) {
        double max = Double.NEGATIVE_INFINITY;
        for (int j = 0; j < k; j++) {
          if (max < h[j][i]) {
            max = h[j][i];
          }
        }

        double Z = 0.0;
        for (int j = 0; j < k; j++) {
          p[j][i] = Math.exp(h[j][i] - max);
          Z += p[j][i];
        }

        for (int j = 0; j < k; j++) {
          p[j][i] /= Z;
        }
      }

      for (int j = 0; j < k; j++) {
        for (int i = 0; i < n; i++) {
          if (y[i] == j) {
            response[j][i] = 1.0;
          } else {
            response[j][i] = 0.0;
          }
          response[j][i] -= p[j][i];
        }

        Arrays.fill(samples, 0);
        Math.permutate(perm);
        for (int i = 0; i < N; i++) {
          samples[perm[i]] = 1;
        }

        forest[j][m] = new RegressionTree(attributes, x, response[j], J, order, samples, output[j]);

        for (int i = 0; i < n; i++) {
          h[j][i] += shrinkage * forest[j][m].predict(x[i]);
        }
      }
    }
  }
Beispiel #5
0
 /**
  * Constructor. Clustering data into k clusters. The maximum number of random search is set to
  * 0.02 * k * (n - k), where n is the number of data and k is the number clusters. The number of
  * local searches is max(8, numProcessors).
  *
  * @param data the dataset for clustering.
  * @param distance the distance/dissimilarity measure.
  * @param k the number of clusters.
  */
 public CLARANS(T[] data, Distance<T> distance, int k) {
   this(data, distance, k, (int) Math.round(0.0125 * k * (data.length - k)));
 }