private void tallyTree( Tree<String> tree, Counter<String> symbolCounter, Counter<UnaryRule> unaryRuleCounter, Counter<BinaryRule> binaryRuleCounter) { if (tree.isLeaf()) return; if (tree.isPreTerminal()) return; if (tree.getChildren().size() == 1) { UnaryRule unaryRule = makeUnaryRule(tree); symbolCounter.incrementCount(tree.getLabel(), 1.0); unaryRuleCounter.incrementCount(unaryRule, 1.0); } if (tree.getChildren().size() == 2) { BinaryRule binaryRule = makeBinaryRule(tree); symbolCounter.incrementCount(tree.getLabel(), 1.0); binaryRuleCounter.incrementCount(binaryRule, 1.0); } if (tree.getChildren().size() < 1 || tree.getChildren().size() > 2) { throw new RuntimeException( "Attempted to construct a Grammar with an illegal tree: " + tree); } for (Tree<String> child : tree.getChildren()) { tallyTree(child, symbolCounter, unaryRuleCounter, binaryRuleCounter); } }
private int tallySpans(Tree<String> tree, int start) { if (tree.isLeaf() || tree.isPreTerminal()) return 1; int end = start; for (Tree<String> child : tree.getChildren()) { int childSpan = tallySpans(child, end); end += childSpan; } String category = tree.getLabel(); if (!category.equals("ROOT")) spanToCategories.incrementCount(end - start, category, 1.0); return end - start; }
private static Tree<String> markovizeTree(Tree<String> tree, String parentLabel) { String label = tree.getLabel(); // Tried using ^ but unannotate didn't remove it. Instead using - since that is properly // removed if (parentLabel != null) { label = label + "-" + parentLabel; } // If you're a preterminal, don't bother markovizing if (tree.isPreTerminal()) return tree; List<Tree<String>> children = new ArrayList<Tree<String>>(); for (Tree<String> child : tree.getChildren()) { children.add(markovizeTree(child, tree.getLabel())); } Tree<String> newTree = new Tree<String>(label, children); return newTree; }
private static Tree<String> binarizeTreeHelper( Tree<String> tree, int numChildrenGenerated, String intermediateLabel) { Tree<String> leftTree = tree.getChildren().get(numChildrenGenerated); List<Tree<String>> children = new ArrayList<Tree<String>>(); children.add(binarizeTree(leftTree)); // Wait! Don't binarize too much. The last child doesn't need to have a new node. // It can be paired with the 2nd to last child! if (numChildrenGenerated == tree.getChildren().size() - 2) { children.add(binarizeTree(tree.getChildren().get(numChildrenGenerated + 1))); } else if (numChildrenGenerated < tree.getChildren().size() - 1) { Tree<String> rightTree = binarizeTreeHelper( tree, numChildrenGenerated + 1, intermediateLabel + "_" + leftTree.getLabel()); children.add(rightTree); } return new Tree<String>(intermediateLabel, children); }
private static Tree<String> binarizeTree(Tree<String> tree) { String label = tree.getLabel(); if (tree.isLeaf()) return new Tree<String>(label); if (tree.getChildren().size() == 1) { return new Tree<String>( label, Collections.singletonList(binarizeTree(tree.getChildren().get(0)))); } // I think it tries to binarize a binary tree. This is silly. Just binarize the subtrees. if (tree.getChildren().size() == 2) { List<Tree<String>> children = new ArrayList<Tree<String>>(); children.add(binarizeTree(tree.getChildren().get(0))); children.add(binarizeTree(tree.getChildren().get(1))); return new Tree<String>(label, children); } // otherwise, it's a TERNARY-or-more local tree, // so decompose it into a sequence of binary and unary trees. String intermediateLabel = "@" + label + "->"; Tree<String> intermediateTree = binarizeTreeHelper(tree, 0, intermediateLabel); return new Tree<String>(label, intermediateTree.getChildren()); }
private BinaryRule makeBinaryRule(Tree<String> tree) { return new BinaryRule( tree.getLabel(), tree.getChildren().get(0).getLabel(), tree.getChildren().get(1).getLabel()); }