コード例 #1
0
  /**
   * Samples n ids by using SimpleRandomSampling (Simple Random Sampling).
   *
   * @param idList
   * @param n
   * @param withReplacement
   * @return
   */
  public static FlatDataCollection randomSampling(
      FlatDataList idList, int n, boolean withReplacement) {
    FlatDataList sampledIds = new FlatDataList();

    int populationN = idList.size();

    for (int i = 0; i < n; ) {
      if (withReplacement == false && populationN <= n) {
        /* if replacement is not allowed and we already sampled everything that it can stop */
        break;
      }

      int randomPosition = PHPMethods.mt_rand(0, populationN - 1);

      Object pointID = idList.get(randomPosition);

      if (withReplacement == false) {
        /* if replacement is not allowed check if the point already exists */
        if (sampledIds.contains(pointID)) {
          continue;
        }
      }

      sampledIds.add(pointID);
      ++i;
    }

    return sampledIds.toFlatDataCollection();
  }
コード例 #2
0
  /**
   * Samples n ids based on their a Table which contains weights, probabilities or frequencies.
   *
   * @param weightedTable
   * @param n
   * @param withReplacement
   * @return
   */
  public static FlatDataCollection weightedSampling(
      AssociativeArray weightedTable, int n, boolean withReplacement) {
    FlatDataList sampledIds = new FlatDataList();

    double sumOfFrequencies = Descriptives.sum(weightedTable.toFlatDataCollection());
    int populationN = weightedTable.size();

    for (int i = 0; i < n; ++i) {
      if (withReplacement == false && populationN <= n) {
        // if replacement is not allowed and we already sampled everything that it can stop
        break;
      }

      double randomFrequency = PHPMethods.mt_rand(0.0, sumOfFrequencies);

      double cumulativeFrequency = 0;
      for (Map.Entry<Object, Object> entry : weightedTable.entrySet()) {
        Object pointID = entry.getKey();
        cumulativeFrequency += TypeInference.toDouble(entry.getValue());
        if (cumulativeFrequency >= randomFrequency) {
          if (withReplacement == false) {
            /* if replacement is not allowed check if the point already exists */
            if (sampledIds.contains(pointID)) {
              continue;
            }
          }

          sampledIds.add(pointID);
          break;
        }
      }
    }

    return sampledIds.toFlatDataCollection();
  }