/** * Given a tree t, if this tree contains a QP of the form QP (RB IN CD|DT ...) well over, more * than QP (JJR IN CD|DT ...) fewer than QP (IN JJS CD|DT ...) at least QP (... CC ...) between 5 * and 10 it will transform it */ private static void doTransform(Tree t) { if (t.value().startsWith("QP")) { // look at the children List<Tree> children = t.getChildrenAsList(); if (children.size() >= 3 && children.get(0).isPreTerminal()) { // go through the children and check if they match the structure we want String child1 = children.get(0).value(); String child2 = children.get(1).value(); String child3 = children.get(2).value(); if ((child3.startsWith("CD") || child3.startsWith("DT")) && (child1.startsWith("RB") || child1.startsWith("JJ") || child1.startsWith("IN")) && (child2.startsWith("IN") || child2.startsWith("JJ"))) { transformQP(t); children = t.getChildrenAsList(); } } // If the children include a CC, we split that into left and // right subtrees with the CC in the middle so the headfinders // have an easier time interpreting the tree later on if (children.size() >= 3) { boolean flat = true; for (int i = 0; i < children.size(); ++i) { if (!children.get(i).isPreTerminal()) { flat = false; break; } } if (flat) { for (int i = 1; i < children.size() - 1; ++i) { if (children.get(i).value().startsWith("CC")) { transformCC( t, children.subList(0, i), children.get(i), children.subList(i + 1, children.size())); break; } } } } /* --- to be written or deleted } else if (t.value().startsWith("NP")) { //look at the children List<Tree> children = t.getChildrenAsList(); if (children.size() >= 3) { } ---- */ } else if (t.isPhrasal()) { for (Tree child : t.children()) { doTransform(child); } } }
Tree prune(Tree tree, int start) { if (tree.isLeaf() || tree.isPreTerminal()) { return tree; } // check each node's children for deletion List<Tree> children = helper(tree.getChildrenAsList(), start); children = prune(children, tree.label(), start, start + tree.yield().size()); return tree.treeFactory().newTreeNode(tree.label(), children); }
List<Tree> prune(List<Tree> treeList, Label label, int start, int end) { // get reference tree if (treeList.size() == 1) { return treeList; } Tree testTree = treeList.get(0).treeFactory().newTreeNode(label, treeList); int goal = Numberer.getGlobalNumberer("states").number(label.value()); Tree tempTree = parser.extractBestParse(goal, start, end); // parser.restoreUnaries(tempTree); Tree pcfgTree = debinarizer.transformTree(tempTree); Set<Constituent> pcfgConstituents = pcfgTree.constituents(new LabeledScoredConstituentFactory()); // delete child labels that are not in reference but do not cross reference List<Tree> prunedChildren = new ArrayList<Tree>(); int childStart = 0; for (int c = 0, numCh = testTree.numChildren(); c < numCh; c++) { Tree child = testTree.getChild(c); boolean isExtra = true; int childEnd = childStart + child.yield().size(); Constituent childConstituent = new LabeledScoredConstituent(childStart, childEnd, child.label(), 0); if (pcfgConstituents.contains(childConstituent)) { isExtra = false; } if (childConstituent.crosses(pcfgConstituents)) { isExtra = false; } if (child.isLeaf() || child.isPreTerminal()) { isExtra = false; } if (pcfgTree.yield().size() != testTree.yield().size()) { isExtra = false; } if (!label.value().startsWith("NP^NP")) { isExtra = false; } if (isExtra) { System.err.println( "Pruning: " + child.label() + " from " + (childStart + start) + " to " + (childEnd + start)); System.err.println("Was: " + testTree + " vs " + pcfgTree); prunedChildren.addAll(child.getChildrenAsList()); } else { prunedChildren.add(child); } childStart = childEnd; } return prunedChildren; }
private static void transformQP(Tree t) { List<Tree> children = t.getChildrenAsList(); TreeFactory tf = t.treeFactory(); LabelFactory lf = t.label().labelFactory(); // create the new XS having the first two children of the QP Tree left = tf.newTreeNode(lf.newLabel("XS"), null); for (int i = 0; i < 2; i++) { left.addChild(children.get(i)); } // remove all the two first children of t before for (int i = 0; i < 2; i++) { t.removeChild(0); } // add XS as the first child t.addChild(0, left); }