Exemple #1
0
  private List<Vector2D> makeBlobs(int centers, double clusterStd, double min, double max) {

    NormalDistribution dist = new NormalDistribution(random, 0.0, clusterStd, 1e-9);

    double range = max - min;
    Vector2D[] centerPoints = new Vector2D[centers];
    for (int i = 0; i < centers; i++) {
      centerPoints[i] =
          new Vector2D(random.nextDouble() * range + min, random.nextDouble() * range + min);
    }

    int[] nSamplesPerCenter = new int[centers];
    int count = samples / centers;
    Arrays.fill(nSamplesPerCenter, count);

    for (int i = 0; i < samples % centers; i++) {
      nSamplesPerCenter[i]++;
    }

    List<Vector2D> points = new ArrayList<>();
    for (int i = 0; i < centers; i++) {
      for (int j = 0; j < nSamplesPerCenter[i]; j++) {
        points.add(new Vector2D(dist.sample(), dist.sample()).add(centerPoints[i]));
      }
    }
    return points;
  }
 /**
  * Causes all known instances of {@link RandomGenerator}, and future ones, to be started from a
  * fixed seed. This is useful for making tests deterministic.
  */
 public static void useTestSeed() {
   useTestSeed = true;
   synchronized (INSTANCES) {
     for (RandomGenerator random : INSTANCES.keySet()) {
       random.setSeed(TEST_SEED);
     }
     INSTANCES.clear();
   }
 }
  public GaussianDistribution(final int dims, final double[] pos, final double size) {
    final Random random = Util.R.get();
    this.dims = dims;
    final double[] means = new double[dims];
    for (int i = 0; i < means.length; i++) {
      means[i] = 1;
    }
    final double[][] diaganals = new double[dims][];
    for (int i = 0; i < diaganals.length; i++) {
      diaganals[i] = new double[dims];
      diaganals[i][i] = 1;
    }
    final RandomGenerator rng = new JDKRandomGenerator();
    rng.setSeed(random.nextInt());

    this.pos = pos;
    this.size = size;
  }
 /**
  * Sample a dataset
  *
  * @param numSamples the number of samples to getFromOrigin
  * @param rng the rng to use
  * @param withReplacement whether to allow duplicates (only tracked by example row number)
  * @return the sample dataset
  */
 public FloatDataSet sample(int numSamples, RandomGenerator rng, boolean withReplacement) {
   if (numSamples >= numExamples()) return this;
   else {
     FloatMatrix examples = new FloatMatrix(numSamples, getFirst().columns);
     FloatMatrix outcomes = new FloatMatrix(numSamples, numOutcomes());
     Set<Integer> added = new HashSet<Integer>();
     for (int i = 0; i < numSamples; i++) {
       int picked = rng.nextInt(numExamples());
       if (!withReplacement)
         while (added.contains(picked)) {
           picked = rng.nextInt(numExamples());
         }
       examples.putRow(i, get(picked).getFirst());
       outcomes.putRow(i, get(picked).getSecond());
     }
     return new FloatDataSet(examples, outcomes);
   }
 }
 @Test
 public void testNextBitSetRandom() {
   RandomGenerator random = RandomManager.getRandom();
   for (int i = 0; i < 100; i++) {
     BitSet bitSet = new BitSet(NUM_BITS);
     for (int j = 0; j < 20 + random.nextInt(50); j++) {
       bitSet.set(random.nextInt(NUM_BITS));
     }
     int from = random.nextInt(NUM_BITS);
     int nextSet = bitSet.nextSetBit(from);
     if (nextSet == -1) {
       for (int j = from; j < NUM_BITS; j++) {
         assertFalse(bitSet.get(j));
       }
     } else {
       for (int j = from; j < nextSet; j++) {
         assertFalse(bitSet.get(j));
       }
       assertTrue(bitSet.get(nextSet));
     }
   }
 }
  /** Test points that are very close together. See issue MATH-546. */
  @Test
  public void testSmallDistances() {
    // Create a bunch of CloseDoublePoints. Most are identical, but one is different by a
    // small distance.
    int[] repeatedArray = {0};
    int[] uniqueArray = {1};
    DoublePoint repeatedPoint = new DoublePoint(repeatedArray);
    DoublePoint uniquePoint = new DoublePoint(uniqueArray);

    Collection<DoublePoint> points = new ArrayList<DoublePoint>();
    final int NUM_REPEATED_POINTS = 10 * 1000;
    for (int i = 0; i < NUM_REPEATED_POINTS; ++i) {
      points.add(repeatedPoint);
    }
    points.add(uniquePoint);

    // Ask a KMeansPlusPlusClusterer to run zero iterations (i.e., to simply choose initial
    // cluster centers).
    final long RANDOM_SEED = 0;
    final int NUM_CLUSTERS = 2;
    final int NUM_ITERATIONS = 0;
    random.setSeed(RANDOM_SEED);

    KMeansPlusPlusClusterer<DoublePoint> clusterer =
        new KMeansPlusPlusClusterer<DoublePoint>(
            NUM_CLUSTERS, NUM_ITERATIONS, new CloseDistance(), random);
    List<CentroidCluster<DoublePoint>> clusters = clusterer.cluster(points);

    // Check that one of the chosen centers is the unique point.
    boolean uniquePointIsCenter = false;
    for (CentroidCluster<DoublePoint> cluster : clusters) {
      if (cluster.getCenter().equals(uniquePoint)) {
        uniquePointIsCenter = true;
      }
    }
    Assert.assertTrue(uniquePointIsCenter);
  }
  /** {@inheritDoc} */
  @Override
  protected UnivariatePointValuePair doOptimize() {
    // Remove all instances of "MaxEval" and "SearchInterval" from the
    // array that will be passed to the internal optimizer.
    // The former is to enforce smaller numbers of allowed evaluations
    // (according to how many have been used up already), and the latter
    // to impose a different start value for each start.
    for (int i = 0; i < optimData.length; i++) {
      if (optimData[i] instanceof MaxEval) {
        optimData[i] = null;
        maxEvalIndex = i;
        continue;
      }
      if (optimData[i] instanceof SearchInterval) {
        optimData[i] = null;
        searchIntervalIndex = i;
        continue;
      }
    }
    if (maxEvalIndex == -1) {
      throw new MathIllegalStateException();
    }
    if (searchIntervalIndex == -1) {
      throw new MathIllegalStateException();
    }

    RuntimeException lastException = null;
    optima = new UnivariatePointValuePair[starts];
    totalEvaluations = 0;

    final int maxEval = getMaxEvaluations();
    final double min = getMin();
    final double max = getMax();
    final double startValue = getStartValue();

    // Multi-start loop.
    for (int i = 0; i < starts; i++) {
      // CHECKSTYLE: stop IllegalCatch
      try {
        // Decrease number of allowed evaluations.
        optimData[maxEvalIndex] = new MaxEval(maxEval - totalEvaluations);
        // New start value.
        final double s = (i == 0) ? startValue : min + generator.nextDouble() * (max - min);
        optimData[searchIntervalIndex] = new SearchInterval(min, max, s);
        // Optimize.
        optima[i] = optimizer.optimize(optimData);
      } catch (RuntimeException mue) {
        lastException = mue;
        optima[i] = null;
      }
      // CHECKSTYLE: resume IllegalCatch

      totalEvaluations += optimizer.getEvaluations();
    }

    sortPairs(getGoalType());

    if (optima[0] == null) {
      throw lastException; // Cannot be null if starts >= 1.
    }

    // Return the point with the best objective function value.
    return optima[0];
  }
 @Before
 public void setUp() {
   random = new JDKRandomGenerator();
   random.setSeed(1746432956321l);
 }
  @Test
  public void testLSHEffect() {
    RandomGenerator random = RandomManager.getRandom();
    PoissonDistribution itemPerUserDist =
        new PoissonDistribution(
            random,
            20,
            PoissonDistribution.DEFAULT_EPSILON,
            PoissonDistribution.DEFAULT_MAX_ITERATIONS);
    int features = 20;
    ALSServingModel mainModel = new ALSServingModel(features, true, 1.0, null);
    ALSServingModel lshModel = new ALSServingModel(features, true, 0.5, null);

    int userItemCount = 20000;
    for (int user = 0; user < userItemCount; user++) {
      String userID = "U" + user;
      float[] vec = VectorMath.randomVectorF(features, random);
      mainModel.setUserVector(userID, vec);
      lshModel.setUserVector(userID, vec);
      int itemsPerUser = itemPerUserDist.sample();
      Collection<String> knownIDs = new ArrayList<>(itemsPerUser);
      for (int i = 0; i < itemsPerUser; i++) {
        knownIDs.add("I" + random.nextInt(userItemCount));
      }
      mainModel.addKnownItems(userID, knownIDs);
      lshModel.addKnownItems(userID, knownIDs);
    }

    for (int item = 0; item < userItemCount; item++) {
      String itemID = "I" + item;
      float[] vec = VectorMath.randomVectorF(features, random);
      mainModel.setItemVector(itemID, vec);
      lshModel.setItemVector(itemID, vec);
    }

    int numRecs = 10;
    Mean meanMatchLength = new Mean();
    for (int user = 0; user < userItemCount; user++) {
      String userID = "U" + user;
      List<Pair<String, Double>> mainRecs =
          mainModel.topN(new DotsFunction(mainModel.getUserVector(userID)), null, numRecs, null);
      List<Pair<String, Double>> lshRecs =
          lshModel.topN(new DotsFunction(lshModel.getUserVector(userID)), null, numRecs, null);
      int i = 0;
      while (i < lshRecs.size() && i < mainRecs.size() && lshRecs.get(i).equals(mainRecs.get(i))) {
        i++;
      }
      meanMatchLength.increment(i);
    }
    log.info("Mean matching prefix: {}", meanMatchLength.getResult());
    assertTrue(meanMatchLength.getResult() >= 4.0);

    meanMatchLength.clear();
    for (int item = 0; item < userItemCount; item++) {
      String itemID = "I" + item;
      List<Pair<String, Double>> mainRecs =
          mainModel.topN(
              new CosineAverageFunction(mainModel.getItemVector(itemID)), null, numRecs, null);
      List<Pair<String, Double>> lshRecs =
          lshModel.topN(
              new CosineAverageFunction(lshModel.getItemVector(itemID)), null, numRecs, null);
      int i = 0;
      while (i < lshRecs.size() && i < mainRecs.size() && lshRecs.get(i).equals(mainRecs.get(i))) {
        i++;
      }
      meanMatchLength.increment(i);
    }
    log.info("Mean matching prefix: {}", meanMatchLength.getResult());
    assertTrue(meanMatchLength.getResult() >= 5.0);
  }