예제 #1
0
  public Object formResult() {
    Set brs = new HashSet();
    Set urs = new HashSet();
    // scan each rule / history pair
    int ruleCount = 0;
    for (Iterator pairI = rulePairs.keySet().iterator(); pairI.hasNext(); ) {
      if (ruleCount % 100 == 0) {
        System.err.println("Rules multiplied: " + ruleCount);
      }
      ruleCount++;
      Pair rulePair = (Pair) pairI.next();
      Rule baseRule = (Rule) rulePair.first;
      String baseLabel = (String) ruleToLabel.get(baseRule);
      List history = (List) rulePair.second;
      double totalProb = 0;
      for (int depth = 1; depth <= HISTORY_DEPTH() && depth <= history.size(); depth++) {
        List subHistory = history.subList(0, depth);
        double c_label = labelPairs.getCount(new Pair(baseLabel, subHistory));
        double c_rule = rulePairs.getCount(new Pair(baseRule, subHistory));
        // System.out.println("Multiplying out "+baseRule+" with history "+subHistory);
        // System.out.println("Count of "+baseLabel+" with "+subHistory+" is "+c_label);
        // System.out.println("Count of "+baseRule+" with "+subHistory+" is "+c_rule );

        double prob = (1.0 / HISTORY_DEPTH()) * (c_rule) / (c_label);
        totalProb += prob;
        for (int childDepth = 0; childDepth <= Math.min(HISTORY_DEPTH() - 1, depth); childDepth++) {
          Rule rule = specifyRule(baseRule, subHistory, childDepth);
          rule.score = (float) Math.log(totalProb);
          // System.out.println("Created  "+rule+" with score "+rule.score);
          if (rule instanceof UnaryRule) {
            urs.add(rule);
          } else {
            brs.add(rule);
          }
        }
      }
    }
    System.out.println("Total states: " + stateNumberer.total());
    BinaryGrammar bg = new BinaryGrammar(stateNumberer.total());
    UnaryGrammar ug = new UnaryGrammar(stateNumberer.total());
    for (Iterator brI = brs.iterator(); brI.hasNext(); ) {
      BinaryRule br = (BinaryRule) brI.next();
      bg.addRule(br);
    }
    for (Iterator urI = urs.iterator(); urI.hasNext(); ) {
      UnaryRule ur = (UnaryRule) urI.next();
      ug.addRule(ur);
    }
    return new Pair(ug, bg);
  }
 /** Run some sanity checks on the training statistics, to make sure they look valid. */
 public void validate() {
   for (Map<SentenceKey, EnsembleStatistics> map : impl) {
     for (EnsembleStatistics stats : map.values()) {
       for (SentenceStatistics component : stats.statisticsForClassifiers) {
         assert !Counters.isUniformDistribution(component.relationDistribution, 1e-5);
         Counters.normalize(
             component.relationDistribution); // TODO(gabor) this shouldn't be necessary
         assert (Math.abs(component.relationDistribution.totalCount() - 1.0)) < 1e-5;
       }
       assert (Math.abs(stats.mean().relationDistribution.totalCount() - 1.0)) < 1e-5;
       assert !Counters.isUniformDistribution(stats.mean().relationDistribution, 1e-5);
     }
   }
 }
 public SentenceStatistics mean() {
   double sumConfidence = 0;
   int countWithConfidence = 0;
   Counter<String> avePredictions =
       new ClassicCounter<>(MapFactory.<String, MutableDouble>linkedHashMapFactory());
   // Sum
   for (SentenceStatistics stat : this.statisticsForClassifiers) {
     for (Double confidence : stat.confidence) {
       sumConfidence += confidence;
       countWithConfidence += 1;
     }
     assert Math.abs(stat.relationDistribution.totalCount() - 1.0) < 1e-5;
     for (Map.Entry<String, Double> entry : stat.relationDistribution.entrySet()) {
       assert entry.getValue() >= 0.0;
       assert entry.getValue() == stat.relationDistribution.getCount(entry.getKey());
       avePredictions.incrementCount(entry.getKey(), entry.getValue());
       assert stat.relationDistribution.getCount(entry.getKey())
           == stat.relationDistribution.getCount(entry.getKey());
     }
   }
   // Normalize
   double aveConfidence = sumConfidence / ((double) countWithConfidence);
   // Return
   if (this.statisticsForClassifiers.size() > 1) {
     Counters.divideInPlace(avePredictions, (double) this.statisticsForClassifiers.size());
   }
   if (Math.abs(avePredictions.totalCount() - 1.0) > 1e-5) {
     throw new IllegalStateException("Mean relation distribution is not a distribution!");
   }
   assert this.statisticsForClassifiers.size() > 1
       || this.statisticsForClassifiers.size() == 0
       || Counters.equals(
           avePredictions,
           statisticsForClassifiers.iterator().next().relationDistribution,
           1e-5);
   return countWithConfidence > 0
       ? new SentenceStatistics(avePredictions, aveConfidence)
       : new SentenceStatistics(avePredictions);
 }
예제 #4
0
 protected void tallyInternalNode(Tree lt, List parents) {
   // form base rule
   String label = lt.label().value();
   Rule baseR = ltToRule(lt);
   ruleToLabel.put(baseR, label);
   // act on each history depth
   for (int depth = 0, maxDepth = Math.min(HISTORY_DEPTH(), parents.size());
       depth <= maxDepth;
       depth++) {
     List history = new ArrayList(parents.subList(0, depth));
     // tally each history level / rewrite pair
     rulePairs.incrementCount(new Pair(baseR, history), 1);
     labelPairs.incrementCount(new Pair(label, history), 1);
   }
 }
 /**
  * Get the top few clauses from this searcher, cutting off at the given minimum probability.
  *
  * @param thresholdProbability The threshold under which to stop returning clauses. This should be
  *     between 0 and 1.
  * @return The resulting {@link edu.stanford.nlp.naturalli.SentenceFragment} objects, representing
  *     the top clauses of the sentence.
  */
 public List<SentenceFragment> topClauses(double thresholdProbability) {
   List<SentenceFragment> results = new ArrayList<>();
   search(
       triple -> {
         assert triple.first <= 0.0;
         double prob = Math.exp(triple.first);
         assert prob <= 1.0;
         assert prob >= 0.0;
         assert !Double.isNaN(prob);
         if (prob >= thresholdProbability) {
           SentenceFragment fragment = triple.third.get();
           fragment.score = prob;
           results.add(fragment);
           return true;
         } else {
           return false;
         }
       });
   return results;
 }