/** * Samples n ids by using SimpleRandomSampling (Simple Random Sampling). * * @param idList * @param n * @param withReplacement * @return */ public static FlatDataCollection randomSampling( FlatDataList idList, int n, boolean withReplacement) { FlatDataList sampledIds = new FlatDataList(); int populationN = idList.size(); for (int i = 0; i < n; ) { if (withReplacement == false && populationN <= n) { /* if replacement is not allowed and we already sampled everything that it can stop */ break; } int randomPosition = PHPMethods.mt_rand(0, populationN - 1); Object pointID = idList.get(randomPosition); if (withReplacement == false) { /* if replacement is not allowed check if the point already exists */ if (sampledIds.contains(pointID)) { continue; } } sampledIds.add(pointID); ++i; } return sampledIds.toFlatDataCollection(); }
/** * Samples n ids based on their a Table which contains weights, probabilities or frequencies. * * @param weightedTable * @param n * @param withReplacement * @return */ public static FlatDataCollection weightedSampling( AssociativeArray weightedTable, int n, boolean withReplacement) { FlatDataList sampledIds = new FlatDataList(); double sumOfFrequencies = Descriptives.sum(weightedTable.toFlatDataCollection()); int populationN = weightedTable.size(); for (int i = 0; i < n; ++i) { if (withReplacement == false && populationN <= n) { // if replacement is not allowed and we already sampled everything that it can stop break; } double randomFrequency = PHPMethods.mt_rand(0.0, sumOfFrequencies); double cumulativeFrequency = 0; for (Map.Entry<Object, Object> entry : weightedTable.entrySet()) { Object pointID = entry.getKey(); cumulativeFrequency += TypeInference.toDouble(entry.getValue()); if (cumulativeFrequency >= randomFrequency) { if (withReplacement == false) { /* if replacement is not allowed check if the point already exists */ if (sampledIds.contains(pointID)) { continue; } } sampledIds.add(pointID); break; } } } return sampledIds.toFlatDataCollection(); }