Ejemplo n.º 1
0
 /* A builds PCFG using the observed counts of binary and unary
  * productions in the training trees to estimate the probabilities
  * for those rules.
  */
 public Grammar(List<Tree<String>> trainTrees) {
   Counter<UnaryRule> unaryRuleCounter = new Counter<UnaryRule>();
   Counter<BinaryRule> binaryRuleCounter = new Counter<BinaryRule>();
   Counter<String> symbolCounter = new Counter<String>();
   for (Tree<String> trainTree : trainTrees) {
     tallyTree(trainTree, symbolCounter, unaryRuleCounter, binaryRuleCounter);
   }
   for (UnaryRule unaryRule : unaryRuleCounter.keySet()) {
     double unaryProbability =
         unaryRuleCounter.getCount(unaryRule) / symbolCounter.getCount(unaryRule.getParent());
     unaryRule.setScore(unaryProbability);
     addUnary(unaryRule);
   }
   for (BinaryRule binaryRule : binaryRuleCounter.keySet()) {
     double binaryProbability =
         binaryRuleCounter.getCount(binaryRule) / symbolCounter.getCount(binaryRule.getParent());
     binaryRule.setScore(binaryProbability);
     addBinary(binaryRule);
   }
 }
Ejemplo n.º 2
0
 public boolean isKnown(String word) {
   return wordCounter.keySet().contains(word);
 }
Ejemplo n.º 3
0
 public Set<String> getAllTags() {
   return tagCounter.keySet();
 }