예제 #1
0
 /**
  * Returns a list of all modes in the Collection. (If the Collection has multiple items with the
  * highest frequency, all of them will be returned.)
  */
 public static <T> Set<T> modes(Collection<T> values) {
   Counter<T> counter = new ClassicCounter<T>(values);
   List<Double> sortedCounts = CollectionUtils.sorted(counter.values());
   Double highestCount = sortedCounts.get(sortedCounts.size() - 1);
   Counters.retainAbove(counter, highestCount);
   return counter.keySet();
 }
예제 #2
0
  @Override
  public Counter<E> score() {

    Counter<E> currentPatternWeights4Label = new ClassicCounter<>();

    Counter<E> pos_i = new ClassicCounter<>();
    Counter<E> neg_i = new ClassicCounter<>();
    Counter<E> unlab_i = new ClassicCounter<>();

    for (Entry<E, ClassicCounter<CandidatePhrase>> en : negPatternsandWords4Label.entrySet()) {
      neg_i.setCount(en.getKey(), en.getValue().size());
    }

    for (Entry<E, ClassicCounter<CandidatePhrase>> en :
        unLabeledPatternsandWords4Label.entrySet()) {
      unlab_i.setCount(en.getKey(), en.getValue().size());
    }

    for (Entry<E, ClassicCounter<CandidatePhrase>> en : patternsandWords4Label.entrySet()) {
      pos_i.setCount(en.getKey(), en.getValue().size());
    }

    Counter<E> all_i = Counters.add(pos_i, neg_i);
    all_i.addAll(unlab_i);
    //    for (Entry<Integer, ClassicCounter<String>> en : allPatternsandWords4Label
    //        .entrySet()) {
    //      all_i.setCount(en.getKey(), en.getValue().size());
    //    }

    Counter<E> posneg_i = Counters.add(pos_i, neg_i);
    Counter<E> logFi = new ClassicCounter<>(pos_i);
    Counters.logInPlace(logFi);

    if (patternScoring.equals(PatternScoring.RlogF)) {
      currentPatternWeights4Label = Counters.product(Counters.division(pos_i, all_i), logFi);
    } else if (patternScoring.equals(PatternScoring.RlogFPosNeg)) {
      Redwood.log("extremePatDebug", "computing rlogfposneg");

      currentPatternWeights4Label = Counters.product(Counters.division(pos_i, posneg_i), logFi);

    } else if (patternScoring.equals(PatternScoring.RlogFUnlabNeg)) {
      Redwood.log("extremePatDebug", "computing rlogfunlabeg");

      currentPatternWeights4Label =
          Counters.product(Counters.division(pos_i, Counters.add(neg_i, unlab_i)), logFi);
    } else if (patternScoring.equals(PatternScoring.RlogFNeg)) {
      Redwood.log("extremePatDebug", "computing rlogfneg");

      currentPatternWeights4Label = Counters.product(Counters.division(pos_i, neg_i), logFi);
    } else if (patternScoring.equals(PatternScoring.YanGarber02)) {

      Counter<E> acc = Counters.division(pos_i, Counters.add(pos_i, neg_i));
      double thetaPrecision = 0.8;
      Counters.retainAbove(acc, thetaPrecision);
      Counter<E> conf = Counters.product(Counters.division(pos_i, all_i), logFi);
      for (E p : acc.keySet()) {
        currentPatternWeights4Label.setCount(p, conf.getCount(p));
      }
    } else if (patternScoring.equals(PatternScoring.LinICML03)) {

      Counter<E> acc = Counters.division(pos_i, Counters.add(pos_i, neg_i));
      double thetaPrecision = 0.8;
      Counters.retainAbove(acc, thetaPrecision);
      Counter<E> conf =
          Counters.product(
              Counters.division(Counters.add(pos_i, Counters.scale(neg_i, -1)), all_i), logFi);
      for (E p : acc.keySet()) {
        currentPatternWeights4Label.setCount(p, conf.getCount(p));
      }
    } else {
      throw new RuntimeException("not implemented " + patternScoring + " . check spelling!");
    }
    return currentPatternWeights4Label;
  }