public List<Pair<String, Double>> selectWeightedKeysWithSampling( ActiveLearningSelectionCriterion criterion, int numSamples, int seed) { List<Pair<String, Double>> result = new ArrayList<>(); forceTrack("Sampling Keys"); log("" + numSamples + " to collect"); // Get uncertainty forceTrack("Computing Uncertainties"); Counter<String> weightCounter = uncertainty(criterion); assert weightCounter.equals(uncertainty(criterion)); endTrack("Computing Uncertainties"); // Compute some statistics startTrack("Uncertainty Histogram"); // log(new Histogram(weightCounter, 50).toString()); // removed to make the release easier // (Histogram isn't in CoreNLP) endTrack("Uncertainty Histogram"); double totalCount = weightCounter.totalCount(); Random random = new Random(seed); // Flatten counter List<String> keys = new LinkedList<>(); List<Double> weights = new LinkedList<>(); List<String> zeroUncertaintyKeys = new LinkedList<>(); for (Pair<String, Double> elem : Counters.toSortedListWithCounts( weightCounter, (o1, o2) -> { int value = o1.compareTo(o2); if (value == 0) { return o1.first.compareTo(o2.first); } else { return value; } })) { if (elem.second != 0.0 || weightCounter.totalCount() == 0.0 || weightCounter.size() <= numSamples) { // ignore 0 probability weights keys.add(elem.first); weights.add(elem.second); } else { zeroUncertaintyKeys.add(elem.first); } } // Error check if (Utils.assertionsEnabled()) { for (Double elem : weights) { if (!(elem >= 0 && !Double.isInfinite(elem) && !Double.isNaN(elem))) { throw new IllegalArgumentException("Invalid weight: " + elem); } } } // Sample SAMPLE_ITER: for (int i = 1; i <= numSamples; ++i) { // For each sample if (i % 1000 == 0) { // Debug log log("sampled " + (i / 1000) + "k keys"); // Recompute total count to mitigate floating point errors totalCount = 0.0; for (double val : weights) { totalCount += val; } } if (weights.size() == 0) { continue; } assert totalCount >= 0.0; assert weights.size() == keys.size(); double target = random.nextDouble() * totalCount; Iterator<String> keyIter = keys.iterator(); Iterator<Double> weightIter = weights.iterator(); double runningTotal = 0.0; while (keyIter.hasNext()) { // For each candidate String key = keyIter.next(); double weight = weightIter.next(); runningTotal += weight; if (target <= runningTotal) { // Select that sample result.add(Pair.makePair(key, weight)); keyIter.remove(); weightIter.remove(); totalCount -= weight; continue SAMPLE_ITER; // continue sampling } } // We should get here only if the keys list is empty warn( "No more uncertain samples left to draw from! (target=" + target + " totalCount=" + totalCount + " size=" + keys.size()); assert keys.size() == 0; if (zeroUncertaintyKeys.size() > 0) { result.add(Pair.makePair(zeroUncertaintyKeys.remove(0), 0.0)); } else { break; } } endTrack("Sampling Keys"); return result; }
public List<Pair<String, Double>> selectWeightedKeys(ActiveLearningSelectionCriterion criterion) { Counter<String> weights = uncertainty(criterion); return Counters.toSortedListWithCounts(weights); }