private static void transformCC(Tree t, List<Tree> left, Tree conj, List<Tree> right) { TreeFactory tf = t.treeFactory(); LabelFactory lf = t.label().labelFactory(); Tree leftQP = tf.newTreeNode(lf.newLabel("NP"), left); Tree rightQP = tf.newTreeNode(lf.newLabel("NP"), right); List<Tree> newChildren = new ArrayList<Tree>(); newChildren.add(leftQP); newChildren.add(conj); newChildren.add(rightQP); t.setChildren(newChildren); }
public Tree transformTree(Tree tree) { Label lab = tree.label(); if (tree.isLeaf()) { Tree leaf = tf.newLeaf(lab); leaf.setScore(tree.score()); return leaf; } String s = lab.value(); s = treebankLanguagePack().basicCategory(s); s = treebankLanguagePack().stripGF(s); int numKids = tree.numChildren(); List<Tree> children = new ArrayList<Tree>(numKids); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.getChild(cNum); Tree newChild = transformTree(child); children.add(newChild); } CategoryWordTag newLabel = new CategoryWordTag(lab); newLabel.setCategory(s); if (lab instanceof HasTag) { String tag = ((HasTag) lab).tag(); tag = treebankLanguagePack().basicCategory(tag); tag = treebankLanguagePack().stripGF(tag); newLabel.setTag(tag); } Tree node = tf.newTreeNode(newLabel, children); node.setScore(tree.score()); return node; }
public Tree transformTree(Tree tree) { Label lab = tree.label(); if (tree.isLeaf()) { Tree leaf = tf.newLeaf(lab); leaf.setScore(tree.score()); return leaf; } String s = lab.value(); s = treebankLanguagePack().basicCategory(s); int numKids = tree.numChildren(); List<Tree> children = new ArrayList<Tree>(numKids); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.getChild(cNum); Tree newChild = transformTree(child); // cdm 2007: for just subcategory stripping, null shouldn't happen // if (newChild != null) { children.add(newChild); // } } // if (children.isEmpty()) { // return null; // } CategoryWordTag newLabel = new CategoryWordTag(lab); newLabel.setCategory(s); if (lab instanceof HasTag) { String tag = ((HasTag) lab).tag(); tag = treebankLanguagePack().basicCategory(tag); newLabel.setTag(tag); } Tree node = tf.newTreeNode(newLabel, children); node.setScore(tree.score()); return node; }
public Tree transformTree(Tree tree) { TreeFactory tf = tree.treeFactory(); String tag = tree.label().value(); if (tree.isPreTerminal()) { String word = tree.firstChild().label().value(); List<Tree> newPreterms = new ArrayList<>(); for (int i = 0, size = word.length(); i < size; i++) { String singleCharLabel = new String(new char[] {word.charAt(i)}); Tree newLeaf = tf.newLeaf(singleCharLabel); String suffix; if (useTwoCharTags) { if (word.length() == 1 || i == 0) { suffix = "_S"; } else { suffix = "_M"; } } else { if (word.length() == 1) { suffix = "_S"; } else if (i == 0) { suffix = "_B"; } else if (i == word.length() - 1) { suffix = "_E"; } else { suffix = "_M"; } } newPreterms.add(tf.newTreeNode(tag + suffix, Collections.<Tree>singletonList(newLeaf))); } return tf.newTreeNode(tag, newPreterms); } else { List<Tree> newChildren = new ArrayList<>(); for (int i = 0; i < tree.children().length; i++) { Tree child = tree.children()[i]; newChildren.add(transformTree(child)); } return tf.newTreeNode(tag, newChildren); } }
public static Tree copyHelper(Tree t, Map<Tree, Tree> newToOld, Map<Tree, Tree> oldToNew) { Tree[] kids = t.children(); Tree[] newKids = new Tree[kids.length]; for (int i = 0, n = kids.length; i < n; i++) { newKids[i] = copyHelper(kids[i], newToOld, oldToNew); } TreeFactory tf = t.treeFactory(); if (kids.length == 0) { Tree newLeaf = tf.newLeaf(t.label()); newToOld.put(newLeaf, t); oldToNew.put(newLeaf, t); return newLeaf; } Tree newNode = tf.newTreeNode(t.label(), Arrays.asList(newKids)); newToOld.put(newNode, t); oldToNew.put(t, newNode); return newNode; }
private static void transformQP(Tree t) { List<Tree> children = t.getChildrenAsList(); TreeFactory tf = t.treeFactory(); LabelFactory lf = t.label().labelFactory(); // create the new XS having the first two children of the QP Tree left = tf.newTreeNode(lf.newLabel("XS"), null); for (int i = 0; i < 2; i++) { left.addChild(children.get(i)); } // remove all the two first children of t before for (int i = 0; i < 2; i++) { t.removeChild(0); } // add XS as the first child t.addChild(0, left); }
/** * Normalize a whole tree -- one can assume that this is the root. This implementation deletes * empty elements (ones with nonterminal tag label '-NONE-') from the tree. */ @Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { TreeTransformer transformer1 = new TreeTransformer() { @Override public Tree transformTree(Tree t) { if (doSGappedStuff) { String lab = t.label().value(); if (lab.equals("S") && includesEmptyNPSubj(t)) { LabelFactory lf = t.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! t.setLabel(lf.newLabel(t.label().value() + "-G")); } } return t; } }; Filter<Tree> subtreeFilter = new Filter<Tree>() { private static final long serialVersionUID = -7250433816896327901L; @Override public boolean accept(Tree t) { Tree[] kids = t.children(); Label l = t.label(); // The special Switchboard non-terminals clause. // Note that it deletes IP which other Treebanks might use! if ("RS".equals(t.label().value()) || "RM".equals(t.label().value()) || "IP".equals(t.label().value()) || "CODE".equals(t.label().value())) { return false; } if ((l != null) && l.value() != null && (l.value().equals("-NONE-")) && !t.isLeaf() && kids.length == 1 && kids[0].isLeaf()) { // Delete empty/trace nodes (ones marked '-NONE-') return false; } return true; } }; Filter<Tree> nodeFilter = new Filter<Tree>() { private static final long serialVersionUID = 9000955019205336311L; @Override public boolean accept(Tree t) { if (t.isLeaf() || t.isPreTerminal()) { return true; } // The special switchboard non-terminals clause. Try keeping EDITED for now.... // if ("EDITED".equals(t.label().value())) { // return false; // } if (t.numChildren() != 1) { return true; } if (t.label() != null && t.label().value() != null && t.label().value().equals(t.children()[0].label().value())) { return false; } return true; } }; TreeTransformer transformer2 = new TreeTransformer() { @Override public Tree transformTree(Tree t) { if (temporalAnnotation == TEMPORAL_ANY_TMP_PERCOLATED) { String lab = t.label().value(); if (TmpPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); oldT = ht; } while (!ht.isPreTerminal()); if (lab.startsWith("PP")) { ht = headFinder.determineHead(t); // look to right int j = t.objectIndexOf(ht); int sz = t.children().length; if (j + 1 < sz) { ht = t.getChild(j + 1); } if (ht.label().value().startsWith("NP")) { while (!ht.isLeaf()) { LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); ht = headFinder.determineHead(ht); } } } } } else if (temporalAnnotation == TEMPORAL_ALL_TERMINALS) { String lab = t.label().value(); if (NPTmpPattern.matcher(lab).matches()) { Tree ht; ht = headFinder.determineHead(t); if (ht.isPreTerminal()) { // change all tags to -TMP LabelFactory lf = ht.label().labelFactory(); Tree[] kids = t.children(); for (Tree kid : kids) { if (kid.isPreTerminal()) { // Note: this changes the tree label, rather // than creating a new tree node. Beware! kid.setLabel(lf.newLabel(kid.value() + "-TMP")); } } } else { Tree oldT = t; do { ht = headFinder.determineHead(oldT); oldT = ht; } while (!ht.isPreTerminal()); LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); } } } else if (temporalAnnotation == TEMPORAL_ALL_NP) { String lab = t.label().value(); if (NPTmpPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } if (ht.isPreTerminal() || ht.value().startsWith("NP")) { LabelFactory lf = ht.labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); oldT = ht; } } while (ht.value().startsWith("NP")); } } else if (temporalAnnotation == TEMPORAL_ALL_NP_AND_PP || temporalAnnotation == TEMPORAL_NP_AND_PP_WITH_NP_HEAD || temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP) { // also allow chain to start with PP String lab = t.value(); if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches()) { Tree oldT = t; do { Tree ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } else if ((temporalAnnotation == TEMPORAL_NP_AND_PP_WITH_NP_HEAD || temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP) && (ht.value().equals("IN") || ht.value().equals("TO"))) { // change the head to be NP if possible Tree[] kidlets = oldT.children(); for (int k = kidlets.length - 1; k > 0; k--) { if (kidlets[k].value().startsWith("NP")) { ht = kidlets[k]; } } } LabelFactory lf = ht.labelFactory(); // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! if (ht.isPreTerminal() || ht.value().startsWith("NP")) { ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } if (temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP && oldT.value().startsWith("PP")) { oldT.setLabel(lf.newLabel(tlp.basicCategory(oldT.value()))); } oldT = ht; } while (oldT.value().startsWith("NP") || oldT.value().startsWith("PP")); } } else if (temporalAnnotation == TEMPORAL_ALL_NP_PP_ADVP) { // also allow chain to start with PP or ADVP String lab = t.value(); if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches() || ADVPTmpPattern.matcher(lab).matches()) { Tree oldT = t; do { Tree ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! if (ht.isPreTerminal() || ht.value().startsWith("NP")) { LabelFactory lf = ht.labelFactory(); ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } oldT = ht; } while (oldT.value().startsWith("NP")); } } else if (temporalAnnotation == TEMPORAL_9) { // also allow chain to start with PP or ADVP String lab = t.value(); if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches() || ADVPTmpPattern.matcher(lab).matches()) { // System.err.println("TMP: Annotating " + t); addTMP9(t); } } else if (temporalAnnotation == TEMPORAL_ACL03PCFG) { String lab = t.label().value(); if (lab != null && NPTmpPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } oldT = ht; } while (!ht.isPreTerminal()); if (!onlyTagAnnotateNstar || ht.label().value().startsWith("N")) { LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); } } } if (doAdverbialNP) { String lab = t.value(); if (NPAdvPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } if (ht.isPreTerminal() || ht.value().startsWith("NP")) { LabelFactory lf = ht.labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-ADV")); oldT = ht; } } while (ht.value().startsWith("NP")); } } return t; } }; // if there wasn't an empty nonterminal at the top, but an S, wrap it. if (tree.label().value().equals("S")) { tree = tf.newTreeNode("ROOT", Collections.singletonList(tree)); } // repair for the phrasal VB in Switchboard (PTB version 3) that should be a VP for (Tree subtree : tree) { if (subtree.isPhrasal() && "VB".equals(subtree.label().value())) { subtree.setValue("VP"); } } tree = tree.transform(transformer1); if (tree == null) { return null; } tree = tree.prune(subtreeFilter, tf); if (tree == null) { return null; } tree = tree.spliceOut(nodeFilter, tf); if (tree == null) { return null; } return tree.transform(transformer2, tf); }