static boolean prefixMatches(Tree target, Tree pattern) { if (pattern.isEmpty()) return true; if (target.isEmpty() || target.value() != pattern.value()) return false; return prefixMatches(target.left(), pattern.left()) && prefixMatches(target.right(), pattern.right()); }
public List<String> makeDependency(Tree head, Tree dep, Tree root) { List<String> result = new ArrayList<String>(3); Tree headTerm = head.headTerminal(hf); Tree depTerm = dep.headTerminal(hf); result.add(headTerm.value()); result.add(depTerm.value()); return result; }
public List<String> makeDependency(Tree head, Tree dep, Tree root) { List<String> result = new ArrayList<String>(3); Tree headTerm = head.headTerminal(hf); Tree depTerm = dep.headTerminal(hf); boolean headLeft = root.leftCharEdge(headTerm) < root.leftCharEdge(depTerm); result.add(headTerm.value()); result.add(depTerm.value()); if (headLeft) result.add(leftHeaded); else result.add(rightHeaded); return result; }
public void add(int value) { Tree temp = new Tree(); Tree temp2 = new Tree(); int i = 0; if (!isEmpty()) { while (temp != null) { if (compareTo(temp) == 1) { if (temp.right != null) { temp2 = temp; temp = temp.right; } else { break; } } if (compareTo(temp) == -1) { if (temp.left != null) { temp2 = temp; temp = temp.left; } else { break; } } i++; } temp.value = value; temp.parent = temp2; } if (i > length) { length++; } }
/* Checks whether the tree t is an existential constituent * There are two cases: * -- affirmative sentences in which "there" is a left sister of the VP * -- questions in which "there" is a daughter of the SQ. * */ private boolean isExistential(Tree t, Tree parent) { if (DEBUG) { System.err.println("isExistential: " + t + ' ' + parent); } boolean toReturn = false; String motherCat = tlp.basicCategory(t.label().value()); // affirmative case if (motherCat.equals("VP") && parent != null) { // take t and the sisters Tree[] kids = parent.children(); // iterate over the sisters before t and checks if existential for (Tree kid : kids) { if (!kid.value().equals("VP")) { List<Label> tags = kid.preTerminalYield(); for (Label tag : tags) { if (tag.value().equals("EX")) { toReturn = true; } } } else { break; } } } // question case else if (motherCat.startsWith("SQ") && parent != null) { // take the daughters Tree[] kids = parent.children(); // iterate over the daughters and checks if existential for (Tree kid : kids) { if (!kid.value().startsWith("VB")) { // not necessary to look into the verb List<Label> tags = kid.preTerminalYield(); for (Label tag : tags) { if (tag.value().equals("EX")) { toReturn = true; } } } } } if (DEBUG) { System.err.println("decision " + toReturn); } return toReturn; }
/** * This looks to see whether any of the children is a preterminal headed by a word which is within * the set verbalSet (which in practice is either auxiliary or copula verbs). It only returns true * if it's a preterminal head, since you don't want to pick things up in phrasal daughters. That * is an error. * * @param kids The child trees * @param verbalSet The set of words * @return Returns true if one of the child trees is a preterminal verb headed by a word in * verbalSet */ private boolean hasVerbalAuxiliary(Tree[] kids, HashSet<String> verbalSet) { if (DEBUG) { System.err.println("Checking for verbal auxiliary"); } for (Tree kid : kids) { if (DEBUG) { System.err.println(" checking in " + kid); } if (kid.isPreTerminal()) { Label kidLabel = kid.label(); String tag = null; if (kidLabel instanceof HasTag) { tag = ((HasTag) kidLabel).tag(); } if (tag == null) { tag = kid.value(); } Label wordLabel = kid.firstChild().label(); String word = null; if (wordLabel instanceof HasWord) { word = ((HasWord) wordLabel).word(); } if (word == null) { word = wordLabel.value(); } if (DEBUG) { System.err.println("Checking " + kid.value() + " head is " + word + '/' + tag); } String lcWord = word.toLowerCase(); if (verbalTags.contains(tag) && verbalSet.contains(lcWord)) { if (DEBUG) { System.err.println("hasVerbalAuxiliary returns true"); } return true; } } } if (DEBUG) { System.err.println("hasVerbalAuxiliary returns false"); } return false; }
public static <A> void printTree(Tree<A> tree, PrintStream ps) { ps.printf("(%s (", tree.value()); for (Tree<A> t : tree.children()) { printTree(t, ps); ps.print(","); } ps.print("))"); }
/** * Given a tree t, if this tree contains a QP of the form QP (RB IN CD|DT ...) well over, more * than QP (JJR IN CD|DT ...) fewer than QP (IN JJS CD|DT ...) at least QP (... CC ...) between 5 * and 10 it will transform it */ private static void doTransform(Tree t) { if (t.value().startsWith("QP")) { // look at the children List<Tree> children = t.getChildrenAsList(); if (children.size() >= 3 && children.get(0).isPreTerminal()) { // go through the children and check if they match the structure we want String child1 = children.get(0).value(); String child2 = children.get(1).value(); String child3 = children.get(2).value(); if ((child3.startsWith("CD") || child3.startsWith("DT")) && (child1.startsWith("RB") || child1.startsWith("JJ") || child1.startsWith("IN")) && (child2.startsWith("IN") || child2.startsWith("JJ"))) { transformQP(t); children = t.getChildrenAsList(); } } // If the children include a CC, we split that into left and // right subtrees with the CC in the middle so the headfinders // have an easier time interpreting the tree later on if (children.size() >= 3) { boolean flat = true; for (int i = 0; i < children.size(); ++i) { if (!children.get(i).isPreTerminal()) { flat = false; break; } } if (flat) { for (int i = 1; i < children.size() - 1; ++i) { if (children.get(i).value().startsWith("CC")) { transformCC( t, children.subList(0, i), children.get(i), children.subList(i + 1, children.size())); break; } } } } /* --- to be written or deleted } else if (t.value().startsWith("NP")) { //look at the children List<Tree> children = t.getChildrenAsList(); if (children.size() >= 3) { } ---- */ } else if (t.isPhrasal()) { for (Tree child : t.children()) { doTransform(child); } } }
/* Is the tree t a WH-question? * At present this is only true if the tree t is a SQ having a WH.* sister * and headed by a SBARQ. * (It was changed to looser definition in Feb 2006.) * */ private static boolean isWHQ(Tree t, Tree parent) { if (t == null) return false; boolean toReturn = false; if (t.value().startsWith("SQ")) { if (parent != null && parent.value().equals("SBARQ")) { Tree[] kids = parent.children(); for (Tree kid : kids) { // looks for a WH.* if (kid.value().startsWith("WH")) { toReturn = true; } } } } if (DEBUG) { System.err.println("in isWH, decision: " + toReturn + " for node " + t); } return toReturn; }
// TODO all this is very memory inefficient and will lead to stack overflows for very deep trees public static ObjectNode toJsonTree(ObjectMapper objectMapper, Tree<? extends JsonNode> tree) { final ObjectNode node = objectMapper.createObjectNode(); node.put("_value", tree.value()); final ArrayNode children = objectMapper.createArrayNode(); for (Tree<? extends JsonNode> child : tree.children()) children.add(toJsonTree(objectMapper, child)); node.put("_children", children); return node; }
public Tree inneficientCopy() { Tree ret = new Tree(this.breadth, this.depth); ret.value = this.value; ret.children.clear(); if (depth > 0) { for (int i = 0; i < breadth; i++) { Tree childToCopy = this.children.get(i); Tree copy = childToCopy.inneficientCopy(); ret.children.add(copy); } } return ret; }
/** Overwrite the postOperationFix method: a, b and c -> we want a to be the head */ protected int postOperationFix(int headIdx, Tree[] daughterTrees) { if (headIdx >= 2) { String prevLab = tlp.basicCategory(daughterTrees[headIdx - 1].value()); if (prevLab.equals("CC") || prevLab.equals("CONJP")) { int newHeadIdx = headIdx - 2; Tree t = daughterTrees[newHeadIdx]; while (newHeadIdx >= 0 && t.isPreTerminal() && tlp.isPunctuationTag(t.value())) { newHeadIdx--; } while (newHeadIdx >= 2 && tlp.isPunctuationTag(daughterTrees[newHeadIdx - 1].value())) { newHeadIdx = newHeadIdx - 2; } if (newHeadIdx >= 0) { headIdx = newHeadIdx; } } } return headIdx; }
/** * Map a function over a tree * * @param fn Function * @param tree Tree of {@code A}'s * @return Tree of {@code B}'s */ public static <A,B> Tree<B> map(final Function<A,B> fn, Tree<A> tree) { final B value = fn.apply(tree.value()); if (isLeaf(tree)) return new ImmutableTree<B>(value); else { final Function<Tree<A>,Tree<B>> tmap = new Function<Tree<A>,Tree<B>>() { public Tree<B> apply(Tree<A> tree) { return map(fn, tree); } }; final Iterable<Tree<B>> tb = Iterables.transform(tree.children(), tmap); return new ImmutableTree<B>(value, tb); } }
private static boolean includesEmptyNPSubj(Tree t) { if (t == null) { return false; } Tree[] kids = t.children(); if (kids == null) { return false; } boolean foundNullSubj = false; for (Tree kid : kids) { Tree[] kidkids = kid.children(); if (NPSbjPattern.matcher(kid.value()).matches()) { kid.setValue("NP"); if (kidkids != null && kidkids.length == 1 && kidkids[0].value().equals("-NONE-")) { // only set flag, since there are 2 a couple of times (errors) foundNullSubj = true; } } } return foundNullSubj; }
/** * Determine which daughter of the current parse tree is the head. * * @param t The parse tree to examine the daughters of. If this is a leaf, <code>null</code> is * returned * @param parent The parent of t * @return The daughter parse tree that is the head of <code>t</code>. Returns null for leaf * nodes. * @see Tree#percolateHeads(HeadFinder) for a routine to call this and spread heads throughout a * tree */ public Tree determineHead(Tree t, Tree parent) { if (nonTerminalInfo == null) { throw new RuntimeException( "Classes derived from AbstractCollinsHeadFinder must" + " create and fill HashMap nonTerminalInfo."); } if (DEBUG) { System.err.println("determineHead for " + t.value()); } if (t.isLeaf()) { return null; } Tree[] kids = t.children(); Tree theHead; // first check if subclass found explicitly marked head if ((theHead = findMarkedHead(t)) != null) { if (DEBUG) { System.err.println( "Find marked head method returned " + theHead.label() + " as head of " + t.label()); } return theHead; } // if the node is a unary, then that kid must be the head // it used to special case preterminal and ROOT/TOP case // but that seemed bad (especially hardcoding string "ROOT") if (kids.length == 1) { if (DEBUG) { System.err.println( "Only one child determines " + kids[0].label() + " as head of " + t.label()); } return kids[0]; } return determineNonTrivialHead(t, parent); }
private static boolean vpContainsParticiple(Tree t) { for (Tree kid : t.children()) { if (DEBUG) { System.err.println("vpContainsParticiple examining " + kid); } if (kid.isPreTerminal()) { Label kidLabel = kid.label(); String tag = null; if (kidLabel instanceof HasTag) { tag = ((HasTag) kidLabel).tag(); } if (tag == null) { tag = kid.value(); } if ("VBN".equals(tag) || "VBG".equals(tag) || "VBD".equals(tag)) { if (DEBUG) { System.err.println("vpContainsParticiple found VBN/VBG/VBD VP"); } return true; } } } return false; }
/** * Add -TMP when not present within an NP * * @param tree The tree to add temporal info to. */ private void addTMP9(final Tree tree) { // do the head chain under it Tree ht = headFinder.determineHead(tree); // special fix for possessives! -- make noun before head if (ht.value().equals("POS")) { int j = tree.objectIndexOf(ht); if (j > 0) { ht = tree.getChild(j - 1); } } // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! if (ht.isPreTerminal() || ht.value().startsWith("NP") || ht.value().startsWith("PP") || ht.value().startsWith("ADVP")) { if (!TmpPattern.matcher(ht.value()).matches()) { LabelFactory lf = ht.labelFactory(); // System.err.println("TMP: Changing " + ht.value() + " to " + // ht.value() + "-TMP"); ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } if (ht.value().startsWith("NP") || ht.value().startsWith("PP") || ht.value().startsWith("ADVP")) { addTMP9(ht); } } // do the NPs under it (which may or may not be the head chain Tree[] kidlets = tree.children(); for (int k = 0; k < kidlets.length; k++) { ht = kidlets[k]; LabelFactory lf; if (tree.isPrePreTerminal() && !TmpPattern.matcher(ht.value()).matches()) { // System.err.println("TMP: Changing " + ht.value() + " to " + // ht.value() + "-TMP"); lf = ht.labelFactory(); // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } else if (ht.value().startsWith("NP")) { // don't add -TMP twice! if (!TmpPattern.matcher(ht.value()).matches()) { lf = ht.labelFactory(); // System.err.println("TMP: Changing " + ht.value() + " to " + // ht.value() + "-TMP"); // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } addTMP9(ht); } } }
// now overally complex so it deals with coordinations. Maybe change this class to use tregrex? private boolean hasPassiveProgressiveAuxiliary(Tree[] kids, HashSet<String> verbalSet) { if (DEBUG) { System.err.println("Checking for passive/progressive auxiliary"); } boolean foundPassiveVP = false; boolean foundPassiveAux = false; for (Tree kid : kids) { if (DEBUG) { System.err.println(" checking in " + kid); } if (kid.isPreTerminal()) { Label kidLabel = kid.label(); String tag = null; if (kidLabel instanceof HasTag) { tag = ((HasTag) kidLabel).tag(); } if (tag == null) { tag = kid.value(); } Label wordLabel = kid.firstChild().label(); String word = null; if (wordLabel instanceof HasWord) { word = ((HasWord) wordLabel).word(); } if (word == null) { word = wordLabel.value(); } if (DEBUG) { System.err.println("Checking " + kid.value() + " head is " + word + '/' + tag); } String lcWord = word.toLowerCase(); if (verbalTags.contains(tag) && verbalSet.contains(lcWord)) { if (DEBUG) { System.err.println("hasPassiveProgressiveAuxiliary found passive aux"); } foundPassiveAux = true; } } else if (kid.isPhrasal()) { Label kidLabel = kid.label(); String cat = null; if (kidLabel instanceof HasCategory) { cat = ((HasCategory) kidLabel).category(); } if (cat == null) { cat = kid.value(); } if (!cat.startsWith("VP")) { continue; } if (DEBUG) { System.err.println("hasPassiveProgressiveAuxiliary found VP"); } Tree[] kidkids = kid.children(); boolean foundParticipleInVp = false; for (Tree kidkid : kidkids) { if (DEBUG) { System.err.println(" hasPassiveProgressiveAuxiliary examining " + kidkid); } if (kidkid.isPreTerminal()) { Label kidkidLabel = kidkid.label(); String tag = null; if (kidkidLabel instanceof HasTag) { tag = ((HasTag) kidkidLabel).tag(); } if (tag == null) { tag = kidkid.value(); } // we allow in VBD because of frequent tagging mistakes if ("VBN".equals(tag) || "VBG".equals(tag) || "VBD".equals(tag)) { foundPassiveVP = true; if (DEBUG) { System.err.println("hasPassiveAuxiliary found VBN/VBG/VBD VP"); } break; } else if ("CC".equals(tag) && foundParticipleInVp) { foundPassiveVP = true; if (DEBUG) { System.err.println( "hasPassiveAuxiliary [coordination] found (VP (VP[VBN/VBG/VBD] CC"); } break; } } else if (kidkid.isPhrasal()) { String catcat = null; if (kidLabel instanceof HasCategory) { catcat = ((HasCategory) kidLabel).category(); } if (catcat == null) { catcat = kid.value(); } if ("VP".equals(catcat)) { if (DEBUG) { System.err.println("hasPassiveAuxiliary found (VP (VP)), recursing"); } foundParticipleInVp = vpContainsParticiple(kidkid); } else if (("CONJP".equals(catcat) || "PRN".equals(catcat)) && foundParticipleInVp) { // occasionally get PRN in CONJ-like structures foundPassiveVP = true; if (DEBUG) { System.err.println( "hasPassiveAuxiliary [coordination] found (VP (VP[VBN/VBG/VBD] CONJP"); } break; } } } } if (foundPassiveAux && foundPassiveVP) { break; } } // end for (Tree kid : kids) if (DEBUG) { System.err.println( "hasPassiveProgressiveAuxiliary returns " + (foundPassiveAux && foundPassiveVP)); } return foundPassiveAux && foundPassiveVP; }
/** * Determine which daughter of the current parse tree is the head. It assumes that the daughters * already have had their heads determined. Uses special rule for VP heads * * @param t The parse tree to examine the daughters of. This is assumed to never be a leaf * @return The parse tree that is the head */ @Override protected Tree determineNonTrivialHead(Tree t, Tree parent) { String motherCat = tlp.basicCategory(t.label().value()); if (DEBUG) { System.err.println("At " + motherCat + ", my parent is " + parent); } // do VPs with auxiliary as special case if ((motherCat.equals("VP") || motherCat.equals("SQ") || motherCat.equals("SINV"))) { Tree[] kids = t.children(); // try to find if there is an auxiliary verb if (DEBUG) { System.err.println("Semantic head finder: at VP"); System.err.println("Class is " + t.getClass().getName()); t.pennPrint(System.err); // System.err.println("hasVerbalAuxiliary = " + hasVerbalAuxiliary(kids, // verbalAuxiliaries)); } // looks for auxiliaries if (hasVerbalAuxiliary(kids, verbalAuxiliaries) || hasPassiveProgressiveAuxiliary(kids, passiveAuxiliaries)) { // String[] how = new String[] {"left", "VP", "ADJP", "NP"}; // Including NP etc seems okay for copular sentences but is // problematic for other auxiliaries, like 'he has an answer' // But maybe doing ADJP is fine! String[] how = {"left", "VP", "ADJP"}; Tree pti = traverseLocate(kids, how, false); if (DEBUG) { System.err.println("Determined head (case 1) for " + t.value() + " is: " + pti); } if (pti != null) { return pti; } else { // System.err.println("------"); // System.err.println("SemanticHeadFinder failed to reassign head for"); // t.pennPrint(System.err); // System.err.println("------"); } } // looks for copular verbs if (hasVerbalAuxiliary(kids, copulars) && !isExistential(t, parent) && !isWHQ(t, parent)) { String[] how; if (motherCat.equals("SQ")) { how = new String[] {"right", "VP", "ADJP", "NP", "WHADJP", "WHNP"}; } else { how = new String[] {"left", "VP", "ADJP", "NP", "WHADJP", "WHNP"}; } Tree pti = traverseLocate(kids, how, false); // don't allow a temporal to become head if (pti != null && pti.label() != null && pti.label().value().contains("-TMP")) { pti = null; } // In SQ, only allow an NP to become head if there is another one to the left (then it's // probably predicative) if (motherCat.equals("SQ") && pti != null && pti.label() != null && pti.label().value().startsWith("NP")) { boolean foundAnotherNp = false; for (Tree kid : kids) { if (kid == pti) { break; } else if (kid.label() != null && kid.label().value().startsWith("NP")) { foundAnotherNp = true; break; } } if (!foundAnotherNp) { pti = null; } } if (DEBUG) { System.err.println("Determined head (case 2) for " + t.value() + " is: " + pti); } if (pti != null) { return pti; } else { if (DEBUG) { System.err.println("------"); System.err.println("SemanticHeadFinder failed to reassign head for"); t.pennPrint(System.err); System.err.println("------"); } } } } Tree hd = super.determineNonTrivialHead(t, parent); if (DEBUG) { System.err.println("Determined head (case 3) for " + t.value() + " is: " + hd); } return hd; }