Ejemplo n.º 1
0
  /**
   * Constructor.
   *
   * @param d the dimensionality of data.
   * @param L the number of hash tables.
   * @param k the number of random projection hash functions, which is usually set to log(N) where N
   *     is the dataset size.
   * @param w the width of random projections. It should be sufficiently away from 0. But we should
   *     not choose an w value that is too large, which will increase the query time.
   * @param H the size of universal hash tables.
   */
  public LSH(int d, int L, int k, double w, int H) {
    if (d < 2) {
      throw new IllegalArgumentException("Invalid input space dimension: " + d);
    }

    if (L < 1) {
      throw new IllegalArgumentException("Invalid number of hash tables: " + L);
    }

    if (k < 1) {
      throw new IllegalArgumentException(
          "Invalid number of random projections per hash value: " + k);
    }

    if (w <= 0.0) {
      throw new IllegalArgumentException("Invalid width of random projections: " + w);
    }

    if (H < 1) {
      throw new IllegalArgumentException("Invalid size of hash tables: " + H);
    }

    this.d = d;
    this.L = L;
    this.k = k;
    this.w = w;
    this.H = H;

    keys = new ArrayList<double[]>();
    data = new ArrayList<E>();
    r1 = new int[k];
    r2 = new int[k];
    for (int i = 0; i < k; i++) {
      r1[i] = Math.randomInt(MAX_HASH_RND);
      r2[i] = Math.randomInt(MAX_HASH_RND);
    }

    hash = new ArrayList<Hash>(L);
    for (int i = 0; i < L; i++) {
      hash.add(new Hash());
    }
  }
Ejemplo n.º 2
0
  /** Generate a random neighbor which differs in only one medoid with current clusters. */
  private double getRandomNeighbor(T[] data, T[] medoids, int[] y, double[] d) {
    int n = data.length;

    int index = Math.randomInt(k);
    T medoid = null;
    boolean dup;
    do {
      dup = false;
      medoid = data[Math.randomInt(n)];
      for (int i = 0; i < k; i++) {
        if (medoid == medoids[i]) {
          dup = true;
          break;
        }
      }
    } while (dup);

    medoids[index] = medoid;

    for (int i = 0; i < n; i++) {
      double dist = distance.d(data[i], medoid);
      if (d[i] > dist) {
        y[i] = index;
        d[i] = dist;
      } else if (y[i] == index) {
        d[i] = dist;
        y[i] = index;
        for (int j = 0; j < k; j++) {
          if (j != index) {
            dist = distance.d(data[i], medoids[j]);
            if (d[i] > dist) {
              y[i] = j;
              d[i] = dist;
            }
          }
        }
      }
    }

    return Math.sum(d);
  }