private void computeEmpiricalStatistics(List<F> geFeatures) {
    // allocate memory to the containers and initialize them
    geFeature2EmpiricalDist = new double[geFeatures.size()][labeledDataset.labelIndex.size()];
    geFeature2DatumList = new ArrayList<List<Integer>>(geFeatures.size());
    Map<F, Integer> geFeatureMap = Generics.newHashMap();
    Set<Integer> activeUnlabeledExamples = Generics.newHashSet();
    for (int n = 0; n < geFeatures.size(); n++) {
      F geFeature = geFeatures.get(n);
      geFeature2DatumList.add(new ArrayList<Integer>());
      Arrays.fill(geFeature2EmpiricalDist[n], 0);
      geFeatureMap.put(geFeature, n);
    }

    // compute the empirical label distribution for each GE feature
    for (int i = 0; i < labeledDataset.size(); i++) {
      Datum<L, F> datum = labeledDataset.getDatum(i);
      int labelID = labeledDataset.labelIndex.indexOf(datum.label());
      for (F feature : datum.asFeatures()) {
        if (geFeatureMap.containsKey(feature)) {
          int geFnum = geFeatureMap.get(feature);
          geFeature2EmpiricalDist[geFnum][labelID]++;
        }
      }
    }
    // now normalize and smooth the label distribution for each feature.
    for (int n = 0; n < geFeatures.size(); n++) {
      ArrayMath.normalize(geFeature2EmpiricalDist[n]);
      smoothDistribution(geFeature2EmpiricalDist[n]);
    }

    // now build the inverted index from each GE feature to unlabeled datums that contain it.
    for (int i = 0; i < unlabeledDataList.size(); i++) {
      Datum<L, F> datum = unlabeledDataList.get(i);
      for (F feature : datum.asFeatures()) {
        if (geFeatureMap.containsKey(feature)) {
          int geFnum = geFeatureMap.get(feature);
          geFeature2DatumList.get(geFnum).add(i);
          activeUnlabeledExamples.add(i);
        }
      }
    }
    System.out.println("Number of active unlabeled examples:" + activeUnlabeledExamples.size());
  }
 private static void smoothDistribution(double[] dist) {
   // perform Laplace smoothing
   double epsilon = 1e-6;
   for (int i = 0; i < dist.length; i++) dist[i] += epsilon;
   ArrayMath.normalize(dist);
 }