private List<Vector2D> makeBlobs(int centers, double clusterStd, double min, double max) { NormalDistribution dist = new NormalDistribution(random, 0.0, clusterStd, 1e-9); double range = max - min; Vector2D[] centerPoints = new Vector2D[centers]; for (int i = 0; i < centers; i++) { centerPoints[i] = new Vector2D(random.nextDouble() * range + min, random.nextDouble() * range + min); } int[] nSamplesPerCenter = new int[centers]; int count = samples / centers; Arrays.fill(nSamplesPerCenter, count); for (int i = 0; i < samples % centers; i++) { nSamplesPerCenter[i]++; } List<Vector2D> points = new ArrayList<>(); for (int i = 0; i < centers; i++) { for (int j = 0; j < nSamplesPerCenter[i]; j++) { points.add(new Vector2D(dist.sample(), dist.sample()).add(centerPoints[i])); } } return points; }
/** * Causes all known instances of {@link RandomGenerator}, and future ones, to be started from a * fixed seed. This is useful for making tests deterministic. */ public static void useTestSeed() { useTestSeed = true; synchronized (INSTANCES) { for (RandomGenerator random : INSTANCES.keySet()) { random.setSeed(TEST_SEED); } INSTANCES.clear(); } }
public GaussianDistribution(final int dims, final double[] pos, final double size) { final Random random = Util.R.get(); this.dims = dims; final double[] means = new double[dims]; for (int i = 0; i < means.length; i++) { means[i] = 1; } final double[][] diaganals = new double[dims][]; for (int i = 0; i < diaganals.length; i++) { diaganals[i] = new double[dims]; diaganals[i][i] = 1; } final RandomGenerator rng = new JDKRandomGenerator(); rng.setSeed(random.nextInt()); this.pos = pos; this.size = size; }
/** * Sample a dataset * * @param numSamples the number of samples to getFromOrigin * @param rng the rng to use * @param withReplacement whether to allow duplicates (only tracked by example row number) * @return the sample dataset */ public FloatDataSet sample(int numSamples, RandomGenerator rng, boolean withReplacement) { if (numSamples >= numExamples()) return this; else { FloatMatrix examples = new FloatMatrix(numSamples, getFirst().columns); FloatMatrix outcomes = new FloatMatrix(numSamples, numOutcomes()); Set<Integer> added = new HashSet<Integer>(); for (int i = 0; i < numSamples; i++) { int picked = rng.nextInt(numExamples()); if (!withReplacement) while (added.contains(picked)) { picked = rng.nextInt(numExamples()); } examples.putRow(i, get(picked).getFirst()); outcomes.putRow(i, get(picked).getSecond()); } return new FloatDataSet(examples, outcomes); } }
@Test public void testNextBitSetRandom() { RandomGenerator random = RandomManager.getRandom(); for (int i = 0; i < 100; i++) { BitSet bitSet = new BitSet(NUM_BITS); for (int j = 0; j < 20 + random.nextInt(50); j++) { bitSet.set(random.nextInt(NUM_BITS)); } int from = random.nextInt(NUM_BITS); int nextSet = bitSet.nextSetBit(from); if (nextSet == -1) { for (int j = from; j < NUM_BITS; j++) { assertFalse(bitSet.get(j)); } } else { for (int j = from; j < nextSet; j++) { assertFalse(bitSet.get(j)); } assertTrue(bitSet.get(nextSet)); } } }
/** Test points that are very close together. See issue MATH-546. */ @Test public void testSmallDistances() { // Create a bunch of CloseDoublePoints. Most are identical, but one is different by a // small distance. int[] repeatedArray = {0}; int[] uniqueArray = {1}; DoublePoint repeatedPoint = new DoublePoint(repeatedArray); DoublePoint uniquePoint = new DoublePoint(uniqueArray); Collection<DoublePoint> points = new ArrayList<DoublePoint>(); final int NUM_REPEATED_POINTS = 10 * 1000; for (int i = 0; i < NUM_REPEATED_POINTS; ++i) { points.add(repeatedPoint); } points.add(uniquePoint); // Ask a KMeansPlusPlusClusterer to run zero iterations (i.e., to simply choose initial // cluster centers). final long RANDOM_SEED = 0; final int NUM_CLUSTERS = 2; final int NUM_ITERATIONS = 0; random.setSeed(RANDOM_SEED); KMeansPlusPlusClusterer<DoublePoint> clusterer = new KMeansPlusPlusClusterer<DoublePoint>( NUM_CLUSTERS, NUM_ITERATIONS, new CloseDistance(), random); List<CentroidCluster<DoublePoint>> clusters = clusterer.cluster(points); // Check that one of the chosen centers is the unique point. boolean uniquePointIsCenter = false; for (CentroidCluster<DoublePoint> cluster : clusters) { if (cluster.getCenter().equals(uniquePoint)) { uniquePointIsCenter = true; } } Assert.assertTrue(uniquePointIsCenter); }
/** {@inheritDoc} */ @Override protected UnivariatePointValuePair doOptimize() { // Remove all instances of "MaxEval" and "SearchInterval" from the // array that will be passed to the internal optimizer. // The former is to enforce smaller numbers of allowed evaluations // (according to how many have been used up already), and the latter // to impose a different start value for each start. for (int i = 0; i < optimData.length; i++) { if (optimData[i] instanceof MaxEval) { optimData[i] = null; maxEvalIndex = i; continue; } if (optimData[i] instanceof SearchInterval) { optimData[i] = null; searchIntervalIndex = i; continue; } } if (maxEvalIndex == -1) { throw new MathIllegalStateException(); } if (searchIntervalIndex == -1) { throw new MathIllegalStateException(); } RuntimeException lastException = null; optima = new UnivariatePointValuePair[starts]; totalEvaluations = 0; final int maxEval = getMaxEvaluations(); final double min = getMin(); final double max = getMax(); final double startValue = getStartValue(); // Multi-start loop. for (int i = 0; i < starts; i++) { // CHECKSTYLE: stop IllegalCatch try { // Decrease number of allowed evaluations. optimData[maxEvalIndex] = new MaxEval(maxEval - totalEvaluations); // New start value. final double s = (i == 0) ? startValue : min + generator.nextDouble() * (max - min); optimData[searchIntervalIndex] = new SearchInterval(min, max, s); // Optimize. optima[i] = optimizer.optimize(optimData); } catch (RuntimeException mue) { lastException = mue; optima[i] = null; } // CHECKSTYLE: resume IllegalCatch totalEvaluations += optimizer.getEvaluations(); } sortPairs(getGoalType()); if (optima[0] == null) { throw lastException; // Cannot be null if starts >= 1. } // Return the point with the best objective function value. return optima[0]; }
@Before public void setUp() { random = new JDKRandomGenerator(); random.setSeed(1746432956321l); }
@Test public void testLSHEffect() { RandomGenerator random = RandomManager.getRandom(); PoissonDistribution itemPerUserDist = new PoissonDistribution( random, 20, PoissonDistribution.DEFAULT_EPSILON, PoissonDistribution.DEFAULT_MAX_ITERATIONS); int features = 20; ALSServingModel mainModel = new ALSServingModel(features, true, 1.0, null); ALSServingModel lshModel = new ALSServingModel(features, true, 0.5, null); int userItemCount = 20000; for (int user = 0; user < userItemCount; user++) { String userID = "U" + user; float[] vec = VectorMath.randomVectorF(features, random); mainModel.setUserVector(userID, vec); lshModel.setUserVector(userID, vec); int itemsPerUser = itemPerUserDist.sample(); Collection<String> knownIDs = new ArrayList<>(itemsPerUser); for (int i = 0; i < itemsPerUser; i++) { knownIDs.add("I" + random.nextInt(userItemCount)); } mainModel.addKnownItems(userID, knownIDs); lshModel.addKnownItems(userID, knownIDs); } for (int item = 0; item < userItemCount; item++) { String itemID = "I" + item; float[] vec = VectorMath.randomVectorF(features, random); mainModel.setItemVector(itemID, vec); lshModel.setItemVector(itemID, vec); } int numRecs = 10; Mean meanMatchLength = new Mean(); for (int user = 0; user < userItemCount; user++) { String userID = "U" + user; List<Pair<String, Double>> mainRecs = mainModel.topN(new DotsFunction(mainModel.getUserVector(userID)), null, numRecs, null); List<Pair<String, Double>> lshRecs = lshModel.topN(new DotsFunction(lshModel.getUserVector(userID)), null, numRecs, null); int i = 0; while (i < lshRecs.size() && i < mainRecs.size() && lshRecs.get(i).equals(mainRecs.get(i))) { i++; } meanMatchLength.increment(i); } log.info("Mean matching prefix: {}", meanMatchLength.getResult()); assertTrue(meanMatchLength.getResult() >= 4.0); meanMatchLength.clear(); for (int item = 0; item < userItemCount; item++) { String itemID = "I" + item; List<Pair<String, Double>> mainRecs = mainModel.topN( new CosineAverageFunction(mainModel.getItemVector(itemID)), null, numRecs, null); List<Pair<String, Double>> lshRecs = lshModel.topN( new CosineAverageFunction(lshModel.getItemVector(itemID)), null, numRecs, null); int i = 0; while (i < lshRecs.size() && i < mainRecs.size() && lshRecs.get(i).equals(mainRecs.get(i))) { i++; } meanMatchLength.increment(i); } log.info("Mean matching prefix: {}", meanMatchLength.getResult()); assertTrue(meanMatchLength.getResult() >= 5.0); }