/* A builds PCFG using the observed counts of binary and unary * productions in the training trees to estimate the probabilities * for those rules. */ public Grammar(List<Tree<String>> trainTrees) { Counter<UnaryRule> unaryRuleCounter = new Counter<UnaryRule>(); Counter<BinaryRule> binaryRuleCounter = new Counter<BinaryRule>(); Counter<String> symbolCounter = new Counter<String>(); for (Tree<String> trainTree : trainTrees) { tallyTree(trainTree, symbolCounter, unaryRuleCounter, binaryRuleCounter); } for (UnaryRule unaryRule : unaryRuleCounter.keySet()) { double unaryProbability = unaryRuleCounter.getCount(unaryRule) / symbolCounter.getCount(unaryRule.getParent()); unaryRule.setScore(unaryProbability); addUnary(unaryRule); } for (BinaryRule binaryRule : binaryRuleCounter.keySet()) { double binaryProbability = binaryRuleCounter.getCount(binaryRule) / symbolCounter.getCount(binaryRule.getParent()); binaryRule.setScore(binaryProbability); addBinary(binaryRule); } }
public boolean isKnown(String word) { return wordCounter.keySet().contains(word); }
public Set<String> getAllTags() { return tagCounter.keySet(); }