/**
  * Sample a dataset
  *
  * @param numSamples the number of samples to getFromOrigin
  * @param rng the rng to use
  * @param withReplacement whether to allow duplicates (only tracked by example row number)
  * @return the sample dataset
  */
 public FloatDataSet sample(int numSamples, RandomGenerator rng, boolean withReplacement) {
   if (numSamples >= numExamples()) return this;
   else {
     FloatMatrix examples = new FloatMatrix(numSamples, getFirst().columns);
     FloatMatrix outcomes = new FloatMatrix(numSamples, numOutcomes());
     Set<Integer> added = new HashSet<Integer>();
     for (int i = 0; i < numSamples; i++) {
       int picked = rng.nextInt(numExamples());
       if (!withReplacement)
         while (added.contains(picked)) {
           picked = rng.nextInt(numExamples());
         }
       examples.putRow(i, get(picked).getFirst());
       outcomes.putRow(i, get(picked).getSecond());
     }
     return new FloatDataSet(examples, outcomes);
   }
 }
 @Test
 public void testNextBitSetRandom() {
   RandomGenerator random = RandomManager.getRandom();
   for (int i = 0; i < 100; i++) {
     BitSet bitSet = new BitSet(NUM_BITS);
     for (int j = 0; j < 20 + random.nextInt(50); j++) {
       bitSet.set(random.nextInt(NUM_BITS));
     }
     int from = random.nextInt(NUM_BITS);
     int nextSet = bitSet.nextSetBit(from);
     if (nextSet == -1) {
       for (int j = from; j < NUM_BITS; j++) {
         assertFalse(bitSet.get(j));
       }
     } else {
       for (int j = from; j < nextSet; j++) {
         assertFalse(bitSet.get(j));
       }
       assertTrue(bitSet.get(nextSet));
     }
   }
 }
示例#3
0
  @Test
  public void testLSHEffect() {
    RandomGenerator random = RandomManager.getRandom();
    PoissonDistribution itemPerUserDist =
        new PoissonDistribution(
            random,
            20,
            PoissonDistribution.DEFAULT_EPSILON,
            PoissonDistribution.DEFAULT_MAX_ITERATIONS);
    int features = 20;
    ALSServingModel mainModel = new ALSServingModel(features, true, 1.0, null);
    ALSServingModel lshModel = new ALSServingModel(features, true, 0.5, null);

    int userItemCount = 20000;
    for (int user = 0; user < userItemCount; user++) {
      String userID = "U" + user;
      float[] vec = VectorMath.randomVectorF(features, random);
      mainModel.setUserVector(userID, vec);
      lshModel.setUserVector(userID, vec);
      int itemsPerUser = itemPerUserDist.sample();
      Collection<String> knownIDs = new ArrayList<>(itemsPerUser);
      for (int i = 0; i < itemsPerUser; i++) {
        knownIDs.add("I" + random.nextInt(userItemCount));
      }
      mainModel.addKnownItems(userID, knownIDs);
      lshModel.addKnownItems(userID, knownIDs);
    }

    for (int item = 0; item < userItemCount; item++) {
      String itemID = "I" + item;
      float[] vec = VectorMath.randomVectorF(features, random);
      mainModel.setItemVector(itemID, vec);
      lshModel.setItemVector(itemID, vec);
    }

    int numRecs = 10;
    Mean meanMatchLength = new Mean();
    for (int user = 0; user < userItemCount; user++) {
      String userID = "U" + user;
      List<Pair<String, Double>> mainRecs =
          mainModel.topN(new DotsFunction(mainModel.getUserVector(userID)), null, numRecs, null);
      List<Pair<String, Double>> lshRecs =
          lshModel.topN(new DotsFunction(lshModel.getUserVector(userID)), null, numRecs, null);
      int i = 0;
      while (i < lshRecs.size() && i < mainRecs.size() && lshRecs.get(i).equals(mainRecs.get(i))) {
        i++;
      }
      meanMatchLength.increment(i);
    }
    log.info("Mean matching prefix: {}", meanMatchLength.getResult());
    assertTrue(meanMatchLength.getResult() >= 4.0);

    meanMatchLength.clear();
    for (int item = 0; item < userItemCount; item++) {
      String itemID = "I" + item;
      List<Pair<String, Double>> mainRecs =
          mainModel.topN(
              new CosineAverageFunction(mainModel.getItemVector(itemID)), null, numRecs, null);
      List<Pair<String, Double>> lshRecs =
          lshModel.topN(
              new CosineAverageFunction(lshModel.getItemVector(itemID)), null, numRecs, null);
      int i = 0;
      while (i < lshRecs.size() && i < mainRecs.size() && lshRecs.get(i).equals(mainRecs.get(i))) {
        i++;
      }
      meanMatchLength.increment(i);
    }
    log.info("Mean matching prefix: {}", meanMatchLength.getResult());
    assertTrue(meanMatchLength.getResult() >= 5.0);
  }