protected Rule specifyRule(Rule rule, List history, int childDepth) { Rule r; String topHistoryStr = historyToString(history.subList(1, history.size())); String bottomHistoryStr = historyToString(history.subList(0, childDepth)); if (rule instanceof UnaryRule) { UnaryRule ur = new UnaryRule(); UnaryRule urule = (UnaryRule) rule; ur.parent = stateNumberer.number(stateNumberer.object(urule.parent) + topHistoryStr); if (isSynthetic(urule.child)) { ur.child = stateNumberer.number(stateNumberer.object(urule.child) + topHistoryStr); } else if (isTag(urule.child)) { ur.child = urule.child; } else { ur.child = stateNumberer.number(stateNumberer.object(urule.child) + bottomHistoryStr); } r = ur; } else { BinaryRule br = new BinaryRule(); BinaryRule brule = (BinaryRule) rule; br.parent = stateNumberer.number(stateNumberer.object(brule.parent) + topHistoryStr); if (isSynthetic(brule.leftChild)) { br.leftChild = stateNumberer.number(stateNumberer.object(brule.leftChild) + topHistoryStr); } else if (isTag(brule.leftChild)) { br.leftChild = brule.leftChild; } else { br.leftChild = stateNumberer.number(stateNumberer.object(brule.leftChild) + bottomHistoryStr); } if (isSynthetic(brule.rightChild)) { br.rightChild = stateNumberer.number(stateNumberer.object(brule.rightChild) + topHistoryStr); } else if (isTag(brule.rightChild)) { br.rightChild = brule.rightChild; } else { br.rightChild = stateNumberer.number(stateNumberer.object(brule.rightChild) + bottomHistoryStr); } r = br; } return r; }
public Object formResult() { Set brs = new HashSet(); Set urs = new HashSet(); // scan each rule / history pair int ruleCount = 0; for (Iterator pairI = rulePairs.keySet().iterator(); pairI.hasNext(); ) { if (ruleCount % 100 == 0) { System.err.println("Rules multiplied: " + ruleCount); } ruleCount++; Pair rulePair = (Pair) pairI.next(); Rule baseRule = (Rule) rulePair.first; String baseLabel = (String) ruleToLabel.get(baseRule); List history = (List) rulePair.second; double totalProb = 0; for (int depth = 1; depth <= HISTORY_DEPTH() && depth <= history.size(); depth++) { List subHistory = history.subList(0, depth); double c_label = labelPairs.getCount(new Pair(baseLabel, subHistory)); double c_rule = rulePairs.getCount(new Pair(baseRule, subHistory)); // System.out.println("Multiplying out "+baseRule+" with history "+subHistory); // System.out.println("Count of "+baseLabel+" with "+subHistory+" is "+c_label); // System.out.println("Count of "+baseRule+" with "+subHistory+" is "+c_rule ); double prob = (1.0 / HISTORY_DEPTH()) * (c_rule) / (c_label); totalProb += prob; for (int childDepth = 0; childDepth <= Math.min(HISTORY_DEPTH() - 1, depth); childDepth++) { Rule rule = specifyRule(baseRule, subHistory, childDepth); rule.score = (float) Math.log(totalProb); // System.out.println("Created "+rule+" with score "+rule.score); if (rule instanceof UnaryRule) { urs.add(rule); } else { brs.add(rule); } } } } System.out.println("Total states: " + stateNumberer.total()); BinaryGrammar bg = new BinaryGrammar(stateNumberer.total()); UnaryGrammar ug = new UnaryGrammar(stateNumberer.total()); for (Iterator brI = brs.iterator(); brI.hasNext(); ) { BinaryRule br = (BinaryRule) brI.next(); bg.addRule(br); } for (Iterator urI = urs.iterator(); urI.hasNext(); ) { UnaryRule ur = (UnaryRule) urI.next(); ug.addRule(ur); } return new Pair(ug, bg); }
protected void tallyInternalNode(Tree lt, List parents) { // form base rule String label = lt.label().value(); Rule baseR = ltToRule(lt); ruleToLabel.put(baseR, label); // act on each history depth for (int depth = 0, maxDepth = Math.min(HISTORY_DEPTH(), parents.size()); depth <= maxDepth; depth++) { List history = new ArrayList(parents.subList(0, depth)); // tally each history level / rewrite pair rulePairs.incrementCount(new Pair(baseR, history), 1); labelPairs.incrementCount(new Pair(label, history), 1); } }
@Override protected void appendNewOrder(Matcher matches, List<TaggedWord> words, List<TaggedWord> newList) { newList.add(new TaggedWord("on", "IN")); newList.addAll(words.subList(matches.start(2), matches.end(3))); }