public Object formResult() { Set brs = new HashSet(); Set urs = new HashSet(); // scan each rule / history pair int ruleCount = 0; for (Iterator pairI = rulePairs.keySet().iterator(); pairI.hasNext(); ) { if (ruleCount % 100 == 0) { System.err.println("Rules multiplied: " + ruleCount); } ruleCount++; Pair rulePair = (Pair) pairI.next(); Rule baseRule = (Rule) rulePair.first; String baseLabel = (String) ruleToLabel.get(baseRule); List history = (List) rulePair.second; double totalProb = 0; for (int depth = 1; depth <= HISTORY_DEPTH() && depth <= history.size(); depth++) { List subHistory = history.subList(0, depth); double c_label = labelPairs.getCount(new Pair(baseLabel, subHistory)); double c_rule = rulePairs.getCount(new Pair(baseRule, subHistory)); // System.out.println("Multiplying out "+baseRule+" with history "+subHistory); // System.out.println("Count of "+baseLabel+" with "+subHistory+" is "+c_label); // System.out.println("Count of "+baseRule+" with "+subHistory+" is "+c_rule ); double prob = (1.0 / HISTORY_DEPTH()) * (c_rule) / (c_label); totalProb += prob; for (int childDepth = 0; childDepth <= Math.min(HISTORY_DEPTH() - 1, depth); childDepth++) { Rule rule = specifyRule(baseRule, subHistory, childDepth); rule.score = (float) Math.log(totalProb); // System.out.println("Created "+rule+" with score "+rule.score); if (rule instanceof UnaryRule) { urs.add(rule); } else { brs.add(rule); } } } } System.out.println("Total states: " + stateNumberer.total()); BinaryGrammar bg = new BinaryGrammar(stateNumberer.total()); UnaryGrammar ug = new UnaryGrammar(stateNumberer.total()); for (Iterator brI = brs.iterator(); brI.hasNext(); ) { BinaryRule br = (BinaryRule) brI.next(); bg.addRule(br); } for (Iterator urI = urs.iterator(); urI.hasNext(); ) { UnaryRule ur = (UnaryRule) urI.next(); ug.addRule(ur); } return new Pair(ug, bg); }
/** Run some sanity checks on the training statistics, to make sure they look valid. */ public void validate() { for (Map<SentenceKey, EnsembleStatistics> map : impl) { for (EnsembleStatistics stats : map.values()) { for (SentenceStatistics component : stats.statisticsForClassifiers) { assert !Counters.isUniformDistribution(component.relationDistribution, 1e-5); Counters.normalize( component.relationDistribution); // TODO(gabor) this shouldn't be necessary assert (Math.abs(component.relationDistribution.totalCount() - 1.0)) < 1e-5; } assert (Math.abs(stats.mean().relationDistribution.totalCount() - 1.0)) < 1e-5; assert !Counters.isUniformDistribution(stats.mean().relationDistribution, 1e-5); } } }
public SentenceStatistics mean() { double sumConfidence = 0; int countWithConfidence = 0; Counter<String> avePredictions = new ClassicCounter<>(MapFactory.<String, MutableDouble>linkedHashMapFactory()); // Sum for (SentenceStatistics stat : this.statisticsForClassifiers) { for (Double confidence : stat.confidence) { sumConfidence += confidence; countWithConfidence += 1; } assert Math.abs(stat.relationDistribution.totalCount() - 1.0) < 1e-5; for (Map.Entry<String, Double> entry : stat.relationDistribution.entrySet()) { assert entry.getValue() >= 0.0; assert entry.getValue() == stat.relationDistribution.getCount(entry.getKey()); avePredictions.incrementCount(entry.getKey(), entry.getValue()); assert stat.relationDistribution.getCount(entry.getKey()) == stat.relationDistribution.getCount(entry.getKey()); } } // Normalize double aveConfidence = sumConfidence / ((double) countWithConfidence); // Return if (this.statisticsForClassifiers.size() > 1) { Counters.divideInPlace(avePredictions, (double) this.statisticsForClassifiers.size()); } if (Math.abs(avePredictions.totalCount() - 1.0) > 1e-5) { throw new IllegalStateException("Mean relation distribution is not a distribution!"); } assert this.statisticsForClassifiers.size() > 1 || this.statisticsForClassifiers.size() == 0 || Counters.equals( avePredictions, statisticsForClassifiers.iterator().next().relationDistribution, 1e-5); return countWithConfidence > 0 ? new SentenceStatistics(avePredictions, aveConfidence) : new SentenceStatistics(avePredictions); }
protected void tallyInternalNode(Tree lt, List parents) { // form base rule String label = lt.label().value(); Rule baseR = ltToRule(lt); ruleToLabel.put(baseR, label); // act on each history depth for (int depth = 0, maxDepth = Math.min(HISTORY_DEPTH(), parents.size()); depth <= maxDepth; depth++) { List history = new ArrayList(parents.subList(0, depth)); // tally each history level / rewrite pair rulePairs.incrementCount(new Pair(baseR, history), 1); labelPairs.incrementCount(new Pair(label, history), 1); } }
/** * Get the top few clauses from this searcher, cutting off at the given minimum probability. * * @param thresholdProbability The threshold under which to stop returning clauses. This should be * between 0 and 1. * @return The resulting {@link edu.stanford.nlp.naturalli.SentenceFragment} objects, representing * the top clauses of the sentence. */ public List<SentenceFragment> topClauses(double thresholdProbability) { List<SentenceFragment> results = new ArrayList<>(); search( triple -> { assert triple.first <= 0.0; double prob = Math.exp(triple.first); assert prob <= 1.0; assert prob >= 0.0; assert !Double.isNaN(prob); if (prob >= thresholdProbability) { SentenceFragment fragment = triple.third.get(); fragment.score = prob; results.add(fragment); return true; } else { return false; } }); return results; }