/** * Samples n ids based on their a Table which contains weights, probabilities or frequencies. * * @param weightedTable * @param n * @param withReplacement * @return */ public static FlatDataCollection weightedSampling( AssociativeArray weightedTable, int n, boolean withReplacement) { FlatDataList sampledIds = new FlatDataList(); double sumOfFrequencies = Descriptives.sum(weightedTable.toFlatDataCollection()); int populationN = weightedTable.size(); for (int i = 0; i < n; ++i) { if (withReplacement == false && populationN <= n) { // if replacement is not allowed and we already sampled everything that it can stop break; } double randomFrequency = PHPMethods.mt_rand(0.0, sumOfFrequencies); double cumulativeFrequency = 0; for (Map.Entry<Object, Object> entry : weightedTable.entrySet()) { Object pointID = entry.getKey(); cumulativeFrequency += TypeInference.toDouble(entry.getValue()); if (cumulativeFrequency >= randomFrequency) { if (withReplacement == false) { /* if replacement is not allowed check if the point already exists */ if (sampledIds.contains(pointID)) { continue; } } sampledIds.add(pointID); break; } } } return sampledIds.toFlatDataCollection(); }
/** * Calculate the variance from the sample * * @param flatDataCollection * @return */ public static double variance(FlatDataCollection flatDataCollection) { return Descriptives.variance(flatDataCollection, true); }
/** * Calculate the mean from the sample * * @param flatDataCollection * @return */ public static double mean(FlatDataCollection flatDataCollection) { return Descriptives.mean(flatDataCollection); }