/** * Constructor. * * @param d the dimensionality of data. * @param L the number of hash tables. * @param k the number of random projection hash functions, which is usually set to log(N) where N * is the dataset size. * @param w the width of random projections. It should be sufficiently away from 0. But we should * not choose an w value that is too large, which will increase the query time. * @param H the size of universal hash tables. */ public LSH(int d, int L, int k, double w, int H) { if (d < 2) { throw new IllegalArgumentException("Invalid input space dimension: " + d); } if (L < 1) { throw new IllegalArgumentException("Invalid number of hash tables: " + L); } if (k < 1) { throw new IllegalArgumentException( "Invalid number of random projections per hash value: " + k); } if (w <= 0.0) { throw new IllegalArgumentException("Invalid width of random projections: " + w); } if (H < 1) { throw new IllegalArgumentException("Invalid size of hash tables: " + H); } this.d = d; this.L = L; this.k = k; this.w = w; this.H = H; keys = new ArrayList<double[]>(); data = new ArrayList<E>(); r1 = new int[k]; r2 = new int[k]; for (int i = 0; i < k; i++) { r1[i] = Math.randomInt(MAX_HASH_RND); r2[i] = Math.randomInt(MAX_HASH_RND); } hash = new ArrayList<Hash>(L); for (int i = 0; i < L; i++) { hash.add(new Hash()); } }
/** Generate a random neighbor which differs in only one medoid with current clusters. */ private double getRandomNeighbor(T[] data, T[] medoids, int[] y, double[] d) { int n = data.length; int index = Math.randomInt(k); T medoid = null; boolean dup; do { dup = false; medoid = data[Math.randomInt(n)]; for (int i = 0; i < k; i++) { if (medoid == medoids[i]) { dup = true; break; } } } while (dup); medoids[index] = medoid; for (int i = 0; i < n; i++) { double dist = distance.d(data[i], medoid); if (d[i] > dist) { y[i] = index; d[i] = dist; } else if (y[i] == index) { d[i] = dist; y[i] = index; for (int j = 0; j < k; j++) { if (j != index) { dist = distance.d(data[i], medoids[j]); if (d[i] > dist) { y[i] = j; d[i] = dist; } } } } } return Math.sum(d); }