/** * Given a tree t, if this tree contains a QP of the form QP (RB IN CD|DT ...) well over, more * than QP (JJR IN CD|DT ...) fewer than QP (IN JJS CD|DT ...) at least QP (... CC ...) between 5 * and 10 it will transform it */ private static void doTransform(Tree t) { if (t.value().startsWith("QP")) { // look at the children List<Tree> children = t.getChildrenAsList(); if (children.size() >= 3 && children.get(0).isPreTerminal()) { // go through the children and check if they match the structure we want String child1 = children.get(0).value(); String child2 = children.get(1).value(); String child3 = children.get(2).value(); if ((child3.startsWith("CD") || child3.startsWith("DT")) && (child1.startsWith("RB") || child1.startsWith("JJ") || child1.startsWith("IN")) && (child2.startsWith("IN") || child2.startsWith("JJ"))) { transformQP(t); children = t.getChildrenAsList(); } } // If the children include a CC, we split that into left and // right subtrees with the CC in the middle so the headfinders // have an easier time interpreting the tree later on if (children.size() >= 3) { boolean flat = true; for (int i = 0; i < children.size(); ++i) { if (!children.get(i).isPreTerminal()) { flat = false; break; } } if (flat) { for (int i = 1; i < children.size() - 1; ++i) { if (children.get(i).value().startsWith("CC")) { transformCC( t, children.subList(0, i), children.get(i), children.subList(i + 1, children.size())); break; } } } } /* --- to be written or deleted } else if (t.value().startsWith("NP")) { //look at the children List<Tree> children = t.getChildrenAsList(); if (children.size() >= 3) { } ---- */ } else if (t.isPhrasal()) { for (Tree child : t.children()) { doTransform(child); } } }
// now overally complex so it deals with coordinations. Maybe change this class to use tregrex? private boolean hasPassiveProgressiveAuxiliary(Tree[] kids, HashSet<String> verbalSet) { if (DEBUG) { System.err.println("Checking for passive/progressive auxiliary"); } boolean foundPassiveVP = false; boolean foundPassiveAux = false; for (Tree kid : kids) { if (DEBUG) { System.err.println(" checking in " + kid); } if (kid.isPreTerminal()) { Label kidLabel = kid.label(); String tag = null; if (kidLabel instanceof HasTag) { tag = ((HasTag) kidLabel).tag(); } if (tag == null) { tag = kid.value(); } Label wordLabel = kid.firstChild().label(); String word = null; if (wordLabel instanceof HasWord) { word = ((HasWord) wordLabel).word(); } if (word == null) { word = wordLabel.value(); } if (DEBUG) { System.err.println("Checking " + kid.value() + " head is " + word + '/' + tag); } String lcWord = word.toLowerCase(); if (verbalTags.contains(tag) && verbalSet.contains(lcWord)) { if (DEBUG) { System.err.println("hasPassiveProgressiveAuxiliary found passive aux"); } foundPassiveAux = true; } } else if (kid.isPhrasal()) { Label kidLabel = kid.label(); String cat = null; if (kidLabel instanceof HasCategory) { cat = ((HasCategory) kidLabel).category(); } if (cat == null) { cat = kid.value(); } if (!cat.startsWith("VP")) { continue; } if (DEBUG) { System.err.println("hasPassiveProgressiveAuxiliary found VP"); } Tree[] kidkids = kid.children(); boolean foundParticipleInVp = false; for (Tree kidkid : kidkids) { if (DEBUG) { System.err.println(" hasPassiveProgressiveAuxiliary examining " + kidkid); } if (kidkid.isPreTerminal()) { Label kidkidLabel = kidkid.label(); String tag = null; if (kidkidLabel instanceof HasTag) { tag = ((HasTag) kidkidLabel).tag(); } if (tag == null) { tag = kidkid.value(); } // we allow in VBD because of frequent tagging mistakes if ("VBN".equals(tag) || "VBG".equals(tag) || "VBD".equals(tag)) { foundPassiveVP = true; if (DEBUG) { System.err.println("hasPassiveAuxiliary found VBN/VBG/VBD VP"); } break; } else if ("CC".equals(tag) && foundParticipleInVp) { foundPassiveVP = true; if (DEBUG) { System.err.println( "hasPassiveAuxiliary [coordination] found (VP (VP[VBN/VBG/VBD] CC"); } break; } } else if (kidkid.isPhrasal()) { String catcat = null; if (kidLabel instanceof HasCategory) { catcat = ((HasCategory) kidLabel).category(); } if (catcat == null) { catcat = kid.value(); } if ("VP".equals(catcat)) { if (DEBUG) { System.err.println("hasPassiveAuxiliary found (VP (VP)), recursing"); } foundParticipleInVp = vpContainsParticiple(kidkid); } else if (("CONJP".equals(catcat) || "PRN".equals(catcat)) && foundParticipleInVp) { // occasionally get PRN in CONJ-like structures foundPassiveVP = true; if (DEBUG) { System.err.println( "hasPassiveAuxiliary [coordination] found (VP (VP[VBN/VBG/VBD] CONJP"); } break; } } } } if (foundPassiveAux && foundPassiveVP) { break; } } // end for (Tree kid : kids) if (DEBUG) { System.err.println( "hasPassiveProgressiveAuxiliary returns " + (foundPassiveAux && foundPassiveVP)); } return foundPassiveAux && foundPassiveVP; }
/** * Normalize a whole tree -- one can assume that this is the root. This implementation deletes * empty elements (ones with nonterminal tag label '-NONE-') from the tree. */ @Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { TreeTransformer transformer1 = new TreeTransformer() { @Override public Tree transformTree(Tree t) { if (doSGappedStuff) { String lab = t.label().value(); if (lab.equals("S") && includesEmptyNPSubj(t)) { LabelFactory lf = t.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! t.setLabel(lf.newLabel(t.label().value() + "-G")); } } return t; } }; Filter<Tree> subtreeFilter = new Filter<Tree>() { private static final long serialVersionUID = -7250433816896327901L; @Override public boolean accept(Tree t) { Tree[] kids = t.children(); Label l = t.label(); // The special Switchboard non-terminals clause. // Note that it deletes IP which other Treebanks might use! if ("RS".equals(t.label().value()) || "RM".equals(t.label().value()) || "IP".equals(t.label().value()) || "CODE".equals(t.label().value())) { return false; } if ((l != null) && l.value() != null && (l.value().equals("-NONE-")) && !t.isLeaf() && kids.length == 1 && kids[0].isLeaf()) { // Delete empty/trace nodes (ones marked '-NONE-') return false; } return true; } }; Filter<Tree> nodeFilter = new Filter<Tree>() { private static final long serialVersionUID = 9000955019205336311L; @Override public boolean accept(Tree t) { if (t.isLeaf() || t.isPreTerminal()) { return true; } // The special switchboard non-terminals clause. Try keeping EDITED for now.... // if ("EDITED".equals(t.label().value())) { // return false; // } if (t.numChildren() != 1) { return true; } if (t.label() != null && t.label().value() != null && t.label().value().equals(t.children()[0].label().value())) { return false; } return true; } }; TreeTransformer transformer2 = new TreeTransformer() { @Override public Tree transformTree(Tree t) { if (temporalAnnotation == TEMPORAL_ANY_TMP_PERCOLATED) { String lab = t.label().value(); if (TmpPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); oldT = ht; } while (!ht.isPreTerminal()); if (lab.startsWith("PP")) { ht = headFinder.determineHead(t); // look to right int j = t.objectIndexOf(ht); int sz = t.children().length; if (j + 1 < sz) { ht = t.getChild(j + 1); } if (ht.label().value().startsWith("NP")) { while (!ht.isLeaf()) { LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); ht = headFinder.determineHead(ht); } } } } } else if (temporalAnnotation == TEMPORAL_ALL_TERMINALS) { String lab = t.label().value(); if (NPTmpPattern.matcher(lab).matches()) { Tree ht; ht = headFinder.determineHead(t); if (ht.isPreTerminal()) { // change all tags to -TMP LabelFactory lf = ht.label().labelFactory(); Tree[] kids = t.children(); for (Tree kid : kids) { if (kid.isPreTerminal()) { // Note: this changes the tree label, rather // than creating a new tree node. Beware! kid.setLabel(lf.newLabel(kid.value() + "-TMP")); } } } else { Tree oldT = t; do { ht = headFinder.determineHead(oldT); oldT = ht; } while (!ht.isPreTerminal()); LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); } } } else if (temporalAnnotation == TEMPORAL_ALL_NP) { String lab = t.label().value(); if (NPTmpPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } if (ht.isPreTerminal() || ht.value().startsWith("NP")) { LabelFactory lf = ht.labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); oldT = ht; } } while (ht.value().startsWith("NP")); } } else if (temporalAnnotation == TEMPORAL_ALL_NP_AND_PP || temporalAnnotation == TEMPORAL_NP_AND_PP_WITH_NP_HEAD || temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP) { // also allow chain to start with PP String lab = t.value(); if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches()) { Tree oldT = t; do { Tree ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } else if ((temporalAnnotation == TEMPORAL_NP_AND_PP_WITH_NP_HEAD || temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP) && (ht.value().equals("IN") || ht.value().equals("TO"))) { // change the head to be NP if possible Tree[] kidlets = oldT.children(); for (int k = kidlets.length - 1; k > 0; k--) { if (kidlets[k].value().startsWith("NP")) { ht = kidlets[k]; } } } LabelFactory lf = ht.labelFactory(); // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! if (ht.isPreTerminal() || ht.value().startsWith("NP")) { ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } if (temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP && oldT.value().startsWith("PP")) { oldT.setLabel(lf.newLabel(tlp.basicCategory(oldT.value()))); } oldT = ht; } while (oldT.value().startsWith("NP") || oldT.value().startsWith("PP")); } } else if (temporalAnnotation == TEMPORAL_ALL_NP_PP_ADVP) { // also allow chain to start with PP or ADVP String lab = t.value(); if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches() || ADVPTmpPattern.matcher(lab).matches()) { Tree oldT = t; do { Tree ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! if (ht.isPreTerminal() || ht.value().startsWith("NP")) { LabelFactory lf = ht.labelFactory(); ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } oldT = ht; } while (oldT.value().startsWith("NP")); } } else if (temporalAnnotation == TEMPORAL_9) { // also allow chain to start with PP or ADVP String lab = t.value(); if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches() || ADVPTmpPattern.matcher(lab).matches()) { // System.err.println("TMP: Annotating " + t); addTMP9(t); } } else if (temporalAnnotation == TEMPORAL_ACL03PCFG) { String lab = t.label().value(); if (lab != null && NPTmpPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } oldT = ht; } while (!ht.isPreTerminal()); if (!onlyTagAnnotateNstar || ht.label().value().startsWith("N")) { LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); } } } if (doAdverbialNP) { String lab = t.value(); if (NPAdvPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } if (ht.isPreTerminal() || ht.value().startsWith("NP")) { LabelFactory lf = ht.labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-ADV")); oldT = ht; } } while (ht.value().startsWith("NP")); } } return t; } }; // if there wasn't an empty nonterminal at the top, but an S, wrap it. if (tree.label().value().equals("S")) { tree = tf.newTreeNode("ROOT", Collections.singletonList(tree)); } // repair for the phrasal VB in Switchboard (PTB version 3) that should be a VP for (Tree subtree : tree) { if (subtree.isPhrasal() && "VB".equals(subtree.label().value())) { subtree.setValue("VP"); } } tree = tree.transform(transformer1); if (tree == null) { return null; } tree = tree.prune(subtreeFilter, tf); if (tree == null) { return null; } tree = tree.spliceOut(nodeFilter, tf); if (tree == null) { return null; } return tree.transform(transformer2, tf); }