public SentenceStatistics mean() {
   double sumConfidence = 0;
   int countWithConfidence = 0;
   Counter<String> avePredictions =
       new ClassicCounter<>(MapFactory.<String, MutableDouble>linkedHashMapFactory());
   // Sum
   for (SentenceStatistics stat : this.statisticsForClassifiers) {
     for (Double confidence : stat.confidence) {
       sumConfidence += confidence;
       countWithConfidence += 1;
     }
     assert Math.abs(stat.relationDistribution.totalCount() - 1.0) < 1e-5;
     for (Map.Entry<String, Double> entry : stat.relationDistribution.entrySet()) {
       assert entry.getValue() >= 0.0;
       assert entry.getValue() == stat.relationDistribution.getCount(entry.getKey());
       avePredictions.incrementCount(entry.getKey(), entry.getValue());
       assert stat.relationDistribution.getCount(entry.getKey())
           == stat.relationDistribution.getCount(entry.getKey());
     }
   }
   // Normalize
   double aveConfidence = sumConfidence / ((double) countWithConfidence);
   // Return
   if (this.statisticsForClassifiers.size() > 1) {
     Counters.divideInPlace(avePredictions, (double) this.statisticsForClassifiers.size());
   }
   if (Math.abs(avePredictions.totalCount() - 1.0) > 1e-5) {
     throw new IllegalStateException("Mean relation distribution is not a distribution!");
   }
   assert this.statisticsForClassifiers.size() > 1
       || this.statisticsForClassifiers.size() == 0
       || Counters.equals(
           avePredictions,
           statisticsForClassifiers.iterator().next().relationDistribution,
           1e-5);
   return countWithConfidence > 0
       ? new SentenceStatistics(avePredictions, aveConfidence)
       : new SentenceStatistics(avePredictions);
 }
  public List<Pair<String, Double>> selectWeightedKeysWithSampling(
      ActiveLearningSelectionCriterion criterion, int numSamples, int seed) {
    List<Pair<String, Double>> result = new ArrayList<>();
    forceTrack("Sampling Keys");
    log("" + numSamples + " to collect");

    // Get uncertainty
    forceTrack("Computing Uncertainties");
    Counter<String> weightCounter = uncertainty(criterion);
    assert weightCounter.equals(uncertainty(criterion));
    endTrack("Computing Uncertainties");
    // Compute some statistics
    startTrack("Uncertainty Histogram");
    //    log(new Histogram(weightCounter, 50).toString());  // removed to make the release easier
    // (Histogram isn't in CoreNLP)
    endTrack("Uncertainty Histogram");
    double totalCount = weightCounter.totalCount();
    Random random = new Random(seed);

    // Flatten counter
    List<String> keys = new LinkedList<>();
    List<Double> weights = new LinkedList<>();
    List<String> zeroUncertaintyKeys = new LinkedList<>();
    for (Pair<String, Double> elem :
        Counters.toSortedListWithCounts(
            weightCounter,
            (o1, o2) -> {
              int value = o1.compareTo(o2);
              if (value == 0) {
                return o1.first.compareTo(o2.first);
              } else {
                return value;
              }
            })) {
      if (elem.second != 0.0
          || weightCounter.totalCount() == 0.0
          || weightCounter.size() <= numSamples) { // ignore 0 probability weights
        keys.add(elem.first);
        weights.add(elem.second);
      } else {
        zeroUncertaintyKeys.add(elem.first);
      }
    }

    // Error check
    if (Utils.assertionsEnabled()) {
      for (Double elem : weights) {
        if (!(elem >= 0 && !Double.isInfinite(elem) && !Double.isNaN(elem))) {
          throw new IllegalArgumentException("Invalid weight: " + elem);
        }
      }
    }

    // Sample
    SAMPLE_ITER:
    for (int i = 1; i <= numSamples; ++i) { // For each sample
      if (i % 1000 == 0) {
        // Debug log
        log("sampled " + (i / 1000) + "k keys");
        // Recompute total count to mitigate floating point errors
        totalCount = 0.0;
        for (double val : weights) {
          totalCount += val;
        }
      }
      if (weights.size() == 0) {
        continue;
      }
      assert totalCount >= 0.0;
      assert weights.size() == keys.size();
      double target = random.nextDouble() * totalCount;
      Iterator<String> keyIter = keys.iterator();
      Iterator<Double> weightIter = weights.iterator();
      double runningTotal = 0.0;
      while (keyIter.hasNext()) { // For each candidate
        String key = keyIter.next();
        double weight = weightIter.next();
        runningTotal += weight;
        if (target <= runningTotal) { // Select that sample
          result.add(Pair.makePair(key, weight));
          keyIter.remove();
          weightIter.remove();
          totalCount -= weight;
          continue SAMPLE_ITER; // continue sampling
        }
      }
      // We should get here only if the keys list is empty
      warn(
          "No more uncertain samples left to draw from! (target="
              + target
              + " totalCount="
              + totalCount
              + " size="
              + keys.size());
      assert keys.size() == 0;
      if (zeroUncertaintyKeys.size() > 0) {
        result.add(Pair.makePair(zeroUncertaintyKeys.remove(0), 0.0));
      } else {
        break;
      }
    }

    endTrack("Sampling Keys");
    return result;
  }
  /** @param args */
  public static void main(String[] args) {
    if (args.length != 3) {
      System.err.printf(
          "Usage: java %s language filename features%n",
          TreebankFactoredLexiconStats.class.getName());
      System.exit(-1);
    }

    Language language = Language.valueOf(args[0]);
    TreebankLangParserParams tlpp = language.params;
    if (language.equals(Language.Arabic)) {
      String[] options = {"-arabicFactored"};
      tlpp.setOptionFlag(options, 0);
    } else {
      String[] options = {"-frenchFactored"};
      tlpp.setOptionFlag(options, 0);
    }
    Treebank tb = tlpp.diskTreebank();
    tb.loadPath(args[1]);

    MorphoFeatureSpecification morphoSpec =
        language.equals(Language.Arabic)
            ? new ArabicMorphoFeatureSpecification()
            : new FrenchMorphoFeatureSpecification();

    String[] features = args[2].trim().split(",");
    for (String feature : features) {
      morphoSpec.activate(MorphoFeatureType.valueOf(feature));
    }

    // Counters
    Counter<String> wordTagCounter = new ClassicCounter<>(30000);
    Counter<String> morphTagCounter = new ClassicCounter<>(500);
    //    Counter<String> signatureTagCounter = new ClassicCounter<String>();
    Counter<String> morphCounter = new ClassicCounter<>(500);
    Counter<String> wordCounter = new ClassicCounter<>(30000);
    Counter<String> tagCounter = new ClassicCounter<>(300);

    Counter<String> lemmaCounter = new ClassicCounter<>(25000);
    Counter<String> lemmaTagCounter = new ClassicCounter<>(25000);

    Counter<String> richTagCounter = new ClassicCounter<>(1000);

    Counter<String> reducedTagCounter = new ClassicCounter<>(500);

    Counter<String> reducedTagLemmaCounter = new ClassicCounter<>(500);

    Map<String, Set<String>> wordLemmaMap = Generics.newHashMap();

    TwoDimensionalIntCounter<String, String> lemmaReducedTagCounter =
        new TwoDimensionalIntCounter<>(30000);
    TwoDimensionalIntCounter<String, String> reducedTagTagCounter =
        new TwoDimensionalIntCounter<>(500);
    TwoDimensionalIntCounter<String, String> tagReducedTagCounter =
        new TwoDimensionalIntCounter<>(300);

    int numTrees = 0;
    for (Tree tree : tb) {
      for (Tree subTree : tree) {
        if (!subTree.isLeaf()) {
          tlpp.transformTree(subTree, tree);
        }
      }
      List<Label> pretermList = tree.preTerminalYield();
      List<Label> yield = tree.yield();
      assert yield.size() == pretermList.size();

      int yieldLen = yield.size();
      for (int i = 0; i < yieldLen; ++i) {
        String tag = pretermList.get(i).value();

        String word = yield.get(i).value();
        String morph = ((CoreLabel) yield.get(i)).originalText();

        // Note: if there is no lemma, then we use the surface form.
        Pair<String, String> lemmaTag = MorphoFeatureSpecification.splitMorphString(word, morph);
        String lemma = lemmaTag.first();
        String richTag = lemmaTag.second();

        // WSGDEBUG
        if (tag.contains("MW")) lemma += "-MWE";

        lemmaCounter.incrementCount(lemma);
        lemmaTagCounter.incrementCount(lemma + tag);

        richTagCounter.incrementCount(richTag);

        String reducedTag = morphoSpec.strToFeatures(richTag).toString();
        reducedTagCounter.incrementCount(reducedTag);

        reducedTagLemmaCounter.incrementCount(reducedTag + lemma);

        wordTagCounter.incrementCount(word + tag);
        morphTagCounter.incrementCount(morph + tag);
        morphCounter.incrementCount(morph);
        wordCounter.incrementCount(word);
        tagCounter.incrementCount(tag);

        reducedTag = reducedTag.equals("") ? "NONE" : reducedTag;
        if (wordLemmaMap.containsKey(word)) {
          wordLemmaMap.get(word).add(lemma);
        } else {
          Set<String> lemmas = Generics.newHashSet(1);
          wordLemmaMap.put(word, lemmas);
        }
        lemmaReducedTagCounter.incrementCount(lemma, reducedTag);
        reducedTagTagCounter.incrementCount(lemma + reducedTag, tag);
        tagReducedTagCounter.incrementCount(tag, reducedTag);
      }
      ++numTrees;
    }

    // Barf...
    System.out.println("Language: " + language.toString());
    System.out.printf("#trees:\t%d%n", numTrees);
    System.out.printf("#tokens:\t%d%n", (int) wordCounter.totalCount());
    System.out.printf("#words:\t%d%n", wordCounter.keySet().size());
    System.out.printf("#tags:\t%d%n", tagCounter.keySet().size());
    System.out.printf("#wordTagPairs:\t%d%n", wordTagCounter.keySet().size());
    System.out.printf("#lemmas:\t%d%n", lemmaCounter.keySet().size());
    System.out.printf("#lemmaTagPairs:\t%d%n", lemmaTagCounter.keySet().size());
    System.out.printf("#feattags:\t%d%n", reducedTagCounter.keySet().size());
    System.out.printf("#feattag+lemmas:\t%d%n", reducedTagLemmaCounter.keySet().size());
    System.out.printf("#richtags:\t%d%n", richTagCounter.keySet().size());
    System.out.printf("#richtag+lemma:\t%d%n", morphCounter.keySet().size());
    System.out.printf("#richtag+lemmaTagPairs:\t%d%n", morphTagCounter.keySet().size());

    // Extra
    System.out.println("==================");
    StringBuilder sbNoLemma = new StringBuilder();
    StringBuilder sbMultLemmas = new StringBuilder();
    for (Map.Entry<String, Set<String>> wordLemmas : wordLemmaMap.entrySet()) {
      String word = wordLemmas.getKey();
      Set<String> lemmas = wordLemmas.getValue();
      if (lemmas.size() == 0) {
        sbNoLemma.append("NO LEMMAS FOR WORD: " + word + "\n");
        continue;
      }
      if (lemmas.size() > 1) {
        sbMultLemmas.append("MULTIPLE LEMMAS: " + word + " " + setToString(lemmas) + "\n");
        continue;
      }
      String lemma = lemmas.iterator().next();
      Set<String> reducedTags = lemmaReducedTagCounter.getCounter(lemma).keySet();
      if (reducedTags.size() > 1) {
        System.out.printf("%s --> %s%n", word, lemma);
        for (String reducedTag : reducedTags) {
          int count = lemmaReducedTagCounter.getCount(lemma, reducedTag);
          String posTags =
              setToString(reducedTagTagCounter.getCounter(lemma + reducedTag).keySet());
          System.out.printf("\t%s\t%d\t%s%n", reducedTag, count, posTags);
        }
        System.out.println();
      }
    }
    System.out.println("==================");
    System.out.println(sbNoLemma.toString());
    System.out.println(sbMultLemmas.toString());
    System.out.println("==================");
    List<String> tags = new ArrayList<>(tagReducedTagCounter.firstKeySet());
    Collections.sort(tags);
    for (String tag : tags) {
      System.out.println(tag);
      Set<String> reducedTags = tagReducedTagCounter.getCounter(tag).keySet();
      for (String reducedTag : reducedTags) {
        int count = tagReducedTagCounter.getCount(tag, reducedTag);
        //        reducedTag = reducedTag.equals("") ? "NONE" : reducedTag;
        System.out.printf("\t%s\t%d%n", reducedTag, count);
      }
      System.out.println();
    }
    System.out.println("==================");
  }