/** * returns the syntactic category of the tree as a list of the syntactic categories of the mother * and the daughters */ public static List<String> localTreeAsCatList(Tree t) { List<String> l = new ArrayList<String>(t.children().length + 1); l.add(t.label().value()); for (int i = 0; i < t.children().length; i++) { l.add(t.children()[i].label().value()); } return l; }
/** * Returns the index of <code>daughter</code> in <code>parent</code> by ==. Returns -1 if <code> * daughter</code> not found. */ public static int objectEqualityIndexOf(Tree parent, Tree daughter) { for (int i = 0; i < parent.children().length; i++) { if (daughter == parent.children()[i]) { return i; } } return -1; }
/** * Called by determineHead and may be overridden in subclasses if special treatment is necessary * for particular categories. */ protected Tree determineNonTrivialHead(Tree t, Tree parent) { Tree theHead = null; String motherCat = tlp.basicCategory(t.label().value()); if (DEBUG) { System.err.println( "Looking for head of " + t.label() + "; value is |" + t.label().value() + "|, " + " baseCat is |" + motherCat + '|'); } // We know we have nonterminals underneath // (a bit of a Penn Treebank assumption, but). // Look at label. // a total special case.... // first look for POS tag at end // this appears to be redundant in the Collins case since the rule already would do that // Tree lastDtr = t.lastChild(); // if (tlp.basicCategory(lastDtr.label().value()).equals("POS")) { // theHead = lastDtr; // } else { String[][] how = nonTerminalInfo.get(motherCat); if (how == null) { if (DEBUG) { System.err.println( "Warning: No rule found for " + motherCat + " (first char: " + motherCat.charAt(0) + ')'); System.err.println("Known nonterms are: " + nonTerminalInfo.keySet()); } if (defaultRule != null) { if (DEBUG) { System.err.println(" Using defaultRule"); } return traverseLocate(t.children(), defaultRule, true); } else { return null; } } for (int i = 0; i < how.length; i++) { boolean deflt = (i == how.length - 1); theHead = traverseLocate(t.children(), how[i], deflt); if (theHead != null) { break; } } if (DEBUG) { System.err.println(" Chose " + theHead.label()); } return theHead; }
public FloatMatrix getWForNode(Tree node) { if (node.children().size() == 2) { String leftLabel = node.children().get(0).value(); String leftBasic = basicCategory(leftLabel); String rightLabel = node.children().get(1).value(); String rightBasic = basicCategory(rightLabel); return binaryTransform.get(leftBasic, rightBasic); } else if (node.children().size() == 1) { throw new AssertionError("No unary transform matrices, only unary classification"); } else { throw new AssertionError("Unexpected tree children size of " + node.children().size()); } }
protected Rule ltToRule(Tree lt) { if (lt.children().length == 1) { UnaryRule ur = new UnaryRule(); ur.parent = stateNumberer.number(lt.label().value()); ur.child = stateNumberer.number(lt.children()[0].label().value()); return ur; } else { BinaryRule br = new BinaryRule(); br.parent = stateNumberer.number(lt.label().value()); br.leftChild = stateNumberer.number(lt.children()[0].label().value()); br.rightChild = stateNumberer.number(lt.children()[1].label().value()); return br; } }
/* Checks whether the tree t is an existential constituent * There are two cases: * -- affirmative sentences in which "there" is a left sister of the VP * -- questions in which "there" is a daughter of the SQ. * */ private boolean isExistential(Tree t, Tree parent) { if (DEBUG) { System.err.println("isExistential: " + t + ' ' + parent); } boolean toReturn = false; String motherCat = tlp.basicCategory(t.label().value()); // affirmative case if (motherCat.equals("VP") && parent != null) { // take t and the sisters Tree[] kids = parent.children(); // iterate over the sisters before t and checks if existential for (Tree kid : kids) { if (!kid.value().equals("VP")) { List<Label> tags = kid.preTerminalYield(); for (Label tag : tags) { if (tag.value().equals("EX")) { toReturn = true; } } } else { break; } } } // question case else if (motherCat.startsWith("SQ") && parent != null) { // take the daughters Tree[] kids = parent.children(); // iterate over the daughters and checks if existential for (Tree kid : kids) { if (!kid.value().startsWith("VB")) { // not necessary to look into the verb List<Label> tags = kid.preTerminalYield(); for (Label tag : tags) { if (tag.value().equals("EX")) { toReturn = true; } } } } } if (DEBUG) { System.err.println("decision " + toReturn); } return toReturn; }
private static void leafLabels(Tree t, List<Label> l) { if (t.isLeaf()) { l.add(t.label()); } else { Tree[] kids = t.children(); for (int j = 0, n = kids.length; j < n; j++) { leafLabels(kids[j], l); } } }
private static void preTerminals(Tree t, List<Tree> l) { if (t.isPreTerminal()) { l.add(t); } else { Tree[] kids = t.children(); for (int j = 0, n = kids.length; j < n; j++) { preTerminals(kids[j], l); } } }
public static <A> void printTree(Tree<A> tree, PrintStream ps) { ps.printf("(%s (", tree.value()); for (Tree<A> t : tree.children()) { printTree(t, ps); ps.print(","); } ps.print("))"); }
/** * Given a tree t, if this tree contains a QP of the form QP (RB IN CD|DT ...) well over, more * than QP (JJR IN CD|DT ...) fewer than QP (IN JJS CD|DT ...) at least QP (... CC ...) between 5 * and 10 it will transform it */ private static void doTransform(Tree t) { if (t.value().startsWith("QP")) { // look at the children List<Tree> children = t.getChildrenAsList(); if (children.size() >= 3 && children.get(0).isPreTerminal()) { // go through the children and check if they match the structure we want String child1 = children.get(0).value(); String child2 = children.get(1).value(); String child3 = children.get(2).value(); if ((child3.startsWith("CD") || child3.startsWith("DT")) && (child1.startsWith("RB") || child1.startsWith("JJ") || child1.startsWith("IN")) && (child2.startsWith("IN") || child2.startsWith("JJ"))) { transformQP(t); children = t.getChildrenAsList(); } } // If the children include a CC, we split that into left and // right subtrees with the CC in the middle so the headfinders // have an easier time interpreting the tree later on if (children.size() >= 3) { boolean flat = true; for (int i = 0; i < children.size(); ++i) { if (!children.get(i).isPreTerminal()) { flat = false; break; } } if (flat) { for (int i = 1; i < children.size() - 1; ++i) { if (children.get(i).value().startsWith("CC")) { transformCC( t, children.subList(0, i), children.get(i), children.subList(i + 1, children.size())); break; } } } } /* --- to be written or deleted } else if (t.value().startsWith("NP")) { //look at the children List<Tree> children = t.getChildrenAsList(); if (children.size() >= 3) { } ---- */ } else if (t.isPhrasal()) { for (Tree child : t.children()) { doTransform(child); } } }
/** * Add -TMP when not present within an NP * * @param tree The tree to add temporal info to. */ private void addTMP9(final Tree tree) { // do the head chain under it Tree ht = headFinder.determineHead(tree); // special fix for possessives! -- make noun before head if (ht.value().equals("POS")) { int j = tree.objectIndexOf(ht); if (j > 0) { ht = tree.getChild(j - 1); } } // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! if (ht.isPreTerminal() || ht.value().startsWith("NP") || ht.value().startsWith("PP") || ht.value().startsWith("ADVP")) { if (!TmpPattern.matcher(ht.value()).matches()) { LabelFactory lf = ht.labelFactory(); // System.err.println("TMP: Changing " + ht.value() + " to " + // ht.value() + "-TMP"); ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } if (ht.value().startsWith("NP") || ht.value().startsWith("PP") || ht.value().startsWith("ADVP")) { addTMP9(ht); } } // do the NPs under it (which may or may not be the head chain Tree[] kidlets = tree.children(); for (int k = 0; k < kidlets.length; k++) { ht = kidlets[k]; LabelFactory lf; if (tree.isPrePreTerminal() && !TmpPattern.matcher(ht.value()).matches()) { // System.err.println("TMP: Changing " + ht.value() + " to " + // ht.value() + "-TMP"); lf = ht.labelFactory(); // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } else if (ht.value().startsWith("NP")) { // don't add -TMP twice! if (!TmpPattern.matcher(ht.value()).matches()) { lf = ht.labelFactory(); // System.err.println("TMP: Changing " + ht.value() + " to " + // ht.value() + "-TMP"); // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } addTMP9(ht); } } }
public static Tree untransformTree(Tree tree) { TreeFactory tf = tree.treeFactory(); if (tree.isPrePreTerminal()) { if (tree.firstChild().label().value().matches(".*_.")) { StringBuilder word = new StringBuilder(); for (int i = 0; i < tree.children().length; i++) { Tree child = tree.children()[i]; word.append(child.firstChild().label().value()); } Tree newChild = tf.newLeaf(word.toString()); tree.setChildren(Collections.singletonList(newChild)); } } else { for (int i = 0; i < tree.children().length; i++) { Tree child = tree.children()[i]; untransformTree(child); } } return tree; }
private static void taggedLeafLabels(Tree t, List<CoreLabel> l) { if (t.isPreTerminal()) { CoreLabel fl = (CoreLabel) t.getChild(0).label(); fl.set(TagLabelAnnotation.class, t.label()); l.add(fl); } else { Tree[] kids = t.children(); for (int j = 0, n = kids.length; j < n; j++) { taggedLeafLabels(kids[j], l); } } }
private static boolean includesEmptyNPSubj(Tree t) { if (t == null) { return false; } Tree[] kids = t.children(); if (kids == null) { return false; } boolean foundNullSubj = false; for (Tree kid : kids) { Tree[] kidkids = kid.children(); if (NPSbjPattern.matcher(kid.value()).matches()) { kid.setValue("NP"); if (kidkids != null && kidkids.length == 1 && kidkids[0].value().equals("-NONE-")) { // only set flag, since there are 2 a couple of times (errors) foundNullSubj = true; } } } return foundNullSubj; }
/** * This is the method to call for assigning labels and node vectors to the Tree. After calling * this, each of the non-leaf nodes will have the node vector and the predictions of their classes * assigned to that subtree's node. */ public void forwardPropagateTree(Tree tree) { FloatMatrix nodeVector; FloatMatrix classification; if (tree.isLeaf()) { // We do nothing for the leaves. The preterminals will // calculate the classification for this word/tag. In fact, the // recursion should not have gotten here (unless there are // degenerate trees of just one leaf) throw new AssertionError("We should not have reached leaves in forwardPropagate"); } else if (tree.isPreTerminal()) { classification = getUnaryClassification(tree.label()); String word = tree.children().get(0).value(); FloatMatrix wordVector = getFeatureVector(word); if (wordVector == null) { wordVector = featureVectors.get(UNKNOWN_FEATURE); } nodeVector = activationFunction.apply(wordVector); } else if (tree.children().size() == 1) { throw new AssertionError( "Non-preterminal nodes of size 1 should have already been collapsed"); } else if (tree.children().size() == 2) { Tree left = tree.firstChild(), right = tree.lastChild(); forwardPropagateTree(left); forwardPropagateTree(right); String leftCategory = tree.children().get(0).label(); String rightCategory = tree.children().get(1).label(); FloatMatrix W = getBinaryTransform(leftCategory, rightCategory); classification = getBinaryClassification(leftCategory, rightCategory); FloatMatrix leftVector = tree.children().get(0).vector(); FloatMatrix rightVector = tree.children().get(1).vector(); FloatMatrix childrenVector = appendBias(leftVector, rightVector); if (useFloatTensors) { FloatTensor floatT = getBinaryFloatTensor(leftCategory, rightCategory); FloatMatrix floatTensorIn = FloatMatrix.concatHorizontally(leftVector, rightVector); FloatMatrix floatTensorOut = floatT.bilinearProducts(floatTensorIn); nodeVector = activationFunction.apply(W.mmul(childrenVector).add(floatTensorOut)); } else nodeVector = activationFunction.apply(W.mmul(childrenVector)); } else { throw new AssertionError("Tree not correctly binarized"); } FloatMatrix inputWithBias = appendBias(nodeVector); FloatMatrix preAct = classification.mmul(inputWithBias); FloatMatrix predictions = outputActivation.apply(preAct); tree.setPrediction(predictions); tree.setVector(nodeVector); }
public Tree transformTree(Tree tree) { TreeFactory tf = tree.treeFactory(); String tag = tree.label().value(); if (tree.isPreTerminal()) { String word = tree.firstChild().label().value(); List<Tree> newPreterms = new ArrayList<>(); for (int i = 0, size = word.length(); i < size; i++) { String singleCharLabel = new String(new char[] {word.charAt(i)}); Tree newLeaf = tf.newLeaf(singleCharLabel); String suffix; if (useTwoCharTags) { if (word.length() == 1 || i == 0) { suffix = "_S"; } else { suffix = "_M"; } } else { if (word.length() == 1) { suffix = "_S"; } else if (i == 0) { suffix = "_B"; } else if (i == word.length() - 1) { suffix = "_E"; } else { suffix = "_M"; } } newPreterms.add(tf.newTreeNode(tag + suffix, Collections.<Tree>singletonList(newLeaf))); } return tf.newTreeNode(tag, newPreterms); } else { List<Tree> newChildren = new ArrayList<>(); for (int i = 0; i < tree.children().length; i++) { Tree child = tree.children()[i]; newChildren.add(transformTree(child)); } return tf.newTreeNode(tag, newChildren); } }
private static int treeToLatexHelper( Tree t, StringBuilder c, StringBuilder h, int n, int nextN, int indent) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < indent; i++) sb.append(" "); h.append('\n').append(sb); h.append("{\\") .append(t.isLeaf() ? "" : "n") .append("tnode{z") .append(n) .append("}{") .append(t.label()) .append('}'); if (!t.isLeaf()) { for (int k = 0; k < t.children().length; k++) { h.append(", "); c.append("\\nodeconnect{z").append(n).append("}{z").append(nextN).append("}\n"); nextN = treeToLatexHelper(t.children()[k], c, h, nextN, nextN + 1, indent + 1); } } h.append('}'); return nextN; }
protected void tallyTree(Tree t, LinkedList<String> parents) { // traverse tree, building parent list String str = t.label().value(); boolean strIsPassive = (str.indexOf('@') == -1); if (strIsPassive) { parents.addFirst(str); } if (!t.isLeaf()) { if (!t.children()[0].isLeaf()) { tallyInternalNode(t, parents); for (int c = 0; c < t.children().length; c++) { Tree child = t.children()[c]; tallyTree(child, parents); } } else { tagNumberer.number(t.label().value()); } } if (strIsPassive) { parents.removeFirst(); } }
// TODO all this is very memory inefficient and will lead to stack overflows for very deep trees public static ObjectNode toJsonTree(ObjectMapper objectMapper, Tree<? extends JsonNode> tree) { final ObjectNode node = objectMapper.createObjectNode(); node.put("_value", tree.value()); final ArrayNode children = objectMapper.createArrayNode(); for (Tree<? extends JsonNode> child : tree.children()) children.add(toJsonTree(objectMapper, child)); node.put("_children", children); return node; }
private static <E> void dependencyObjectifyHelper( Tree t, Tree root, HeadFinder hf, Collection<E> c, DependencyTyper<E> typer) { if (t.isLeaf() || t.isPreTerminal()) { return; } Tree headDtr = hf.determineHead(t); for (Tree child : t.children()) { dependencyObjectifyHelper(child, root, hf, c, typer); if (child != headDtr) { c.add(typer.makeDependency(headDtr, child, root)); } } }
static Tree getPreTerminal(Tree tree, MutableInteger i, int n) { if (i.intValue() == n) { if (tree.isPreTerminal()) { return tree; } else { return getPreTerminal(tree.children()[0], i, n); } } else { if (tree.isPreTerminal()) { i.set(i.intValue() + tree.yield().size()); return null; } else { Tree[] kids = tree.children(); for (int j = 0; j < kids.length; j++) { Tree result = getPreTerminal(kids[j], i, n); if (result != null) { return result; } } return null; } } }
/** * traceTo() values that are contained in the tree are assigned to new objects. traceTo() values * that are not contained in the tree are given the old value. */ public static void fixEmptyTreeLeafs(Tree t, Map<Tree, Tree> newToOld, Map<Tree, Tree> oldToNew) { Tree[] kids = t.children(); for (int i = 0, n = kids.length; i < n; i++) { fixEmptyTreeLeafs(kids[i], newToOld, oldToNew); } if (t instanceof EmptyTreeLeaf) { EmptyTreeLeaf oldT = (EmptyTreeLeaf) newToOld.get(t); ((EmptyTreeLeaf) t).setEmptyType(oldT.emptyType()); Tree oldTraceTo = oldT.traceTo(); Tree newTraceTo = oldToNew.get(oldTraceTo); if (newTraceTo != null) ((EmptyTreeLeaf) t).setTraceTo(newTraceTo); else ((EmptyTreeLeaf) t).setTraceTo(oldTraceTo); } }
/** replaces all instances (by ==) of node with node1. Doesn't affect the node t itself */ public static void replaceNode(Tree node, Tree node1, Tree t) { if (t.isLeaf()) return; Tree[] kids = t.children(); List<Tree> newKids = new ArrayList<Tree>(kids.length); for (int i = 0, n = kids.length; i < n; i++) { if (kids[i] != node) { newKids.add(kids[i]); replaceNode(node, node1, kids[i]); } else { newKids.add(node1); } } t.setChildren(newKids); }
private static int treeToLatexEvenHelper( Tree t, StringBuilder c, StringBuilder h, int n, int nextN, int indent, int curDepth, int maxDepth) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < indent; i++) sb.append(" "); h.append('\n').append(sb); int tDepth = t.depth(); if (tDepth == 0 && tDepth + curDepth < maxDepth) { for (int pad = 0; pad < maxDepth - tDepth - curDepth; pad++) { h.append("{\\ntnode{pad}{}, "); } } h.append("{\\ntnode{z").append(n).append("}{").append(t.label()).append('}'); if (!t.isLeaf()) { for (int k = 0; k < t.children().length; k++) { h.append(", "); c.append("\\nodeconnect{z").append(n).append("}{z").append(nextN).append("}\n"); nextN = treeToLatexEvenHelper( t.children()[k], c, h, nextN, nextN + 1, indent + 1, curDepth + 1, maxDepth); } } if (tDepth == 0 && tDepth + curDepth < maxDepth) { for (int pad = 0; pad < maxDepth - tDepth - curDepth; pad++) { h.append('}'); } } h.append('}'); return nextN; }
public static ArrayList<ArrayList<TaggedWord>> getPhrases(Tree parse, int phraseSizeLimit) { ArrayList<ArrayList<TaggedWord>> newList = new ArrayList<ArrayList<TaggedWord>>(); List<Tree> leaves = parse.getLeaves(); if (leaves.size() <= phraseSizeLimit) { // ArrayList<TaggedWord> phraseElements = PreprocessPhrase(parse.taggedYield()); ArrayList<TaggedWord> phraseElements = Preprocess(parse.taggedYield()); if (phraseElements.size() > 0) newList.add(phraseElements); } else { Tree[] childrenNodes = parse.children(); for (int i = 0; i < childrenNodes.length; i++) { Tree currentParse = childrenNodes[i]; newList.addAll(getPhrases(currentParse, phraseSizeLimit)); } } return newList; }
static boolean rightEdge(Tree t, Tree t1, MutableInteger i) { if (t == t1) { return true; } else if (t1.isLeaf()) { int j = t1.yield().size(); // so that empties don't add size i.set(i.intValue() - j); return false; } else { Tree[] kids = t1.children(); for (int j = kids.length - 1; j >= 0; j--) { if (rightEdge(t, kids[j], i)) { return true; } } return false; } }
public static Tree copyHelper(Tree t, Map<Tree, Tree> newToOld, Map<Tree, Tree> oldToNew) { Tree[] kids = t.children(); Tree[] newKids = new Tree[kids.length]; for (int i = 0, n = kids.length; i < n; i++) { newKids[i] = copyHelper(kids[i], newToOld, oldToNew); } TreeFactory tf = t.treeFactory(); if (kids.length == 0) { Tree newLeaf = tf.newLeaf(t.label()); newToOld.put(newLeaf, t); oldToNew.put(newLeaf, t); return newLeaf; } Tree newNode = tf.newTreeNode(t.label(), Arrays.asList(newKids)); newToOld.put(newNode, t); oldToNew.put(t, newNode); return newNode; }
/** * Map a function over a tree * * @param fn Function * @param tree Tree of {@code A}'s * @return Tree of {@code B}'s */ public static <A,B> Tree<B> map(final Function<A,B> fn, Tree<A> tree) { final B value = fn.apply(tree.value()); if (isLeaf(tree)) return new ImmutableTree<B>(value); else { final Function<Tree<A>,Tree<B>> tmap = new Function<Tree<A>,Tree<B>>() { public Tree<B> apply(Tree<A> tree) { return map(fn, tree); } }; final Iterable<Tree<B>> tb = Iterables.transform(tree.children(), tmap); return new ImmutableTree<B>(value, tb); } }
/* Is the tree t a WH-question? * At present this is only true if the tree t is a SQ having a WH.* sister * and headed by a SBARQ. * (It was changed to looser definition in Feb 2006.) * */ private static boolean isWHQ(Tree t, Tree parent) { if (t == null) return false; boolean toReturn = false; if (t.value().startsWith("SQ")) { if (parent != null && parent.value().equals("SBARQ")) { Tree[] kids = parent.children(); for (Tree kid : kids) { // looks for a WH.* if (kid.value().startsWith("WH")) { toReturn = true; } } } } if (DEBUG) { System.err.println("in isWH, decision: " + toReturn + " for node " + t); } return toReturn; }
public FloatMatrix getClassWForNode(Tree node) { if (combineClassification) { return unaryClassification.get(""); } else if (node.children().size() == 2) { String leftLabel = node.children().get(0).value(); String leftBasic = basicCategory(leftLabel); String rightLabel = node.children().get(1).value(); String rightBasic = basicCategory(rightLabel); return binaryClassification.get(leftBasic, rightBasic); } else if (node.children().size() == 1) { String unaryLabel = node.children().get(0).value(); String unaryBasic = basicCategory(unaryLabel); return unaryClassification.get(unaryBasic); } else { throw new AssertionError("Unexpected tree children size of " + node.children().size()); } }