/** * returns the syntactic category of the tree as a list of the syntactic categories of the mother * and the daughters */ public static List<String> localTreeAsCatList(Tree t) { List<String> l = new ArrayList<String>(t.children().length + 1); l.add(t.label().value()); for (int i = 0; i < t.children().length; i++) { l.add(t.children()[i].label().value()); } return l; }
/** * Returns the index of <code>daughter</code> in <code>parent</code> by ==. Returns -1 if <code> * daughter</code> not found. */ public static int objectEqualityIndexOf(Tree parent, Tree daughter) { for (int i = 0; i < parent.children().length; i++) { if (daughter == parent.children()[i]) { return i; } } return -1; }
private List<Tree> helper(List<Tree> treeList, int start) { List<Tree> newTreeList = new ArrayList<Tree>(treeList.size()); for (Tree tree : treeList) { int end = start + tree.yield().size(); newTreeList.add(prune(tree, start)); start = end; } return newTreeList; }
private static void leafLabels(Tree t, List<Label> l) { if (t.isLeaf()) { l.add(t.label()); } else { Tree[] kids = t.children(); for (int j = 0, n = kids.length; j < n; j++) { leafLabels(kids[j], l); } } }
private static void preTerminals(Tree t, List<Tree> l) { if (t.isPreTerminal()) { l.add(t); } else { Tree[] kids = t.children(); for (int j = 0, n = kids.length; j < n; j++) { preTerminals(kids[j], l); } } }
public static ArrayList<TaggedWord> StanfordParse(String sentence, LexicalizedParser lp) { TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); List<CoreLabel> rawWords2 = tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize(); Tree parse = lp.apply(rawWords2); ArrayList<TaggedWord> taggedWords = parse.taggedYield(); return taggedWords; }
/** * Gets the <i>i</i>th leaf of a tree from the left. The leftmost leaf is numbered 0. * * @return The <i>i</i><sup>th</sup> leaf as a Tree, or <code>null</code> if there is no such * leaf. */ public static Tree getLeaf(Tree tree, int i) { int count = -1; for (Tree next : tree) { if (next.isLeaf()) { count++; } if (count == i) { return next; } } return null; }
private static void taggedLeafLabels(Tree t, List<CoreLabel> l) { if (t.isPreTerminal()) { CoreLabel fl = (CoreLabel) t.getChild(0).label(); fl.set(TagLabelAnnotation.class, t.label()); l.add(fl); } else { Tree[] kids = t.children(); for (int j = 0, n = kids.length; j < n; j++) { taggedLeafLabels(kids[j], l); } } }
/** replaces all instances (by ==) of node with node1. Doesn't affect the node t itself */ public static void replaceNode(Tree node, Tree node1, Tree t) { if (t.isLeaf()) return; Tree[] kids = t.children(); List<Tree> newKids = new ArrayList<Tree>(kids.length); for (int i = 0, n = kids.length; i < n; i++) { if (kids[i] != node) { newKids.add(kids[i]); replaceNode(node, node1, kids[i]); } else { newKids.add(node1); } } t.setChildren(newKids); }
/** * returns the maximal projection of <code>head</code> in <code>root</code> given a {@link * HeadFinder} */ public static Tree maximalProjection(Tree head, Tree root, HeadFinder hf) { Tree projection = head; if (projection == root) { return root; } Tree parent = projection.parent(root); while (hf.determineHead(parent) == projection) { projection = parent; if (projection == root) { return root; } parent = projection.parent(root); } return projection; }
private ArrayList<TaggedWord> parseSentenceTD(String text) { System.out.println("Parsing sentence..."); ArrayList<TaggedWord> tw = new ArrayList<TaggedWord>(); Reader reader = new StringReader(text); for (List<HasWord> sentence : new DocumentPreprocessor(reader)) { Tree parse = lp.apply(sentence); tw = parse.taggedYield(); } return tw; }
/** returns true iff <code>head</code> (transitively) heads <code>node</code> */ public static boolean heads(Tree head, Tree node, HeadFinder hf) { if (node.isLeaf()) { return false; } else { return heads(head, hf.determineHead(node), hf); } }
public static String treeToLatexEven(Tree t) { StringBuilder connections = new StringBuilder(); StringBuilder hierarchy = new StringBuilder(); int maxDepth = t.depth(); treeToLatexEvenHelper(t, connections, hierarchy, 0, 1, 0, 0, maxDepth); return "\\tree" + hierarchy + '\n' + connections + '\n'; }
static boolean rightEdge(Tree t, Tree t1, MutableInteger i) { if (t == t1) { return true; } else if (t1.isLeaf()) { int j = t1.yield().size(); // so that empties don't add size i.set(i.intValue() - j); return false; } else { Tree[] kids = t1.children(); for (int j = kids.length - 1; j >= 0; j--) { if (rightEdge(t, kids[j], i)) { return true; } } return false; } }
public static ArrayList<ArrayList<TaggedWord>> getPhrases(Tree parse, int phraseSizeLimit) { ArrayList<ArrayList<TaggedWord>> newList = new ArrayList<ArrayList<TaggedWord>>(); List<Tree> leaves = parse.getLeaves(); if (leaves.size() <= phraseSizeLimit) { // ArrayList<TaggedWord> phraseElements = PreprocessPhrase(parse.taggedYield()); ArrayList<TaggedWord> phraseElements = Preprocess(parse.taggedYield()); if (phraseElements.size() > 0) newList.add(phraseElements); } else { Tree[] childrenNodes = parse.children(); for (int i = 0; i < childrenNodes.length; i++) { Tree currentParse = childrenNodes[i]; newList.addAll(getPhrases(currentParse, phraseSizeLimit)); } } return newList; }
/** * Return information about the objects in this Tree. * * @param t The tree to examine. * @return A human-readable String */ public static String toDebugStructureString(Tree t) { StringBuilder sb = new StringBuilder(); String tCl = StringUtils.getShortClassName(t); String tfCl = StringUtils.getShortClassName(t.treeFactory()); String lCl = StringUtils.getShortClassName(t.label()); String lfCl = StringUtils.getShortClassName(t.label().labelFactory()); Set<String> otherClasses = new HashSet<String>(); for (Tree st : t) { String stCl = StringUtils.getShortClassName(st); String stfCl = StringUtils.getShortClassName(st.treeFactory()); String slCl = StringUtils.getShortClassName(st.label()); String slfCl = StringUtils.getShortClassName(st.label().labelFactory()); if (!tCl.equals(stCl)) { otherClasses.add(stCl); } if (!tfCl.equals(stfCl)) { otherClasses.add(stfCl); } if (!lCl.equals(slCl)) { otherClasses.add(slCl); } if (!lfCl.equals(slfCl)) { otherClasses.add(slfCl); } } sb.append("Tree with root of class ").append(tCl).append(" and factory ").append(tfCl); sb.append(" with label class ").append(lCl).append(" and factory ").append(lfCl); if (!otherClasses.isEmpty()) { sb.append(" with the following classes also found within the tree: ").append(otherClasses); } return sb.toString(); }
/** * Returns the positional index of the right edge of a tree <i>t</i> within a given root, as * defined by the size of the yield of all material preceding <i>t</i> plus all the material * contained in <i>t</i>. */ public static int rightEdge(Tree t, Tree root) { MutableInteger i = new MutableInteger(root.yield().size()); if (rightEdge(t, root, i)) { return i.intValue(); } else { throw new RuntimeException("Tree is not a descendent of root."); // return root.yield().size() + 1; } }
public static Tree copyHelper(Tree t, Map<Tree, Tree> newToOld, Map<Tree, Tree> oldToNew) { Tree[] kids = t.children(); Tree[] newKids = new Tree[kids.length]; for (int i = 0, n = kids.length; i < n; i++) { newKids[i] = copyHelper(kids[i], newToOld, oldToNew); } TreeFactory tf = t.treeFactory(); if (kids.length == 0) { Tree newLeaf = tf.newLeaf(t.label()); newToOld.put(newLeaf, t); oldToNew.put(newLeaf, t); return newLeaf; } Tree newNode = tf.newTreeNode(t.label(), Arrays.asList(newKids)); newToOld.put(newNode, t); oldToNew.put(t, newNode); return newNode; }
public static void main(String args[]) { // String sentence1 = "A large bird standing on a table picks up a plastic glass // containing liquid and places it in a bowl of something."; // String sentence2 = "A bird picks up a plastic cup containing a liquid with it's beak // and puts the cup into a bowl."; // LexicalizedParser lp = new LexicalizedParser("englishPCFG.ser.gz"); // LeskWSD tm = new LeskWSD(lp); // WordNetSimilarity ws = new WordNetSimilarity(); // // System.out.println(LexicalSimilarityScoreWordNet(sentence1, sentence2, tm, lp, ws)); String sentence = "The broader Standard & Poor's 500 Index <.SPX> shed 2.38 points, or 0.24 percent, at 995.10."; LexicalizedParser lp = new LexicalizedParser("englishPCFG.ser.gz"); Tree parse = lp.apply(sentence); ArrayList<TaggedWord> taggedWords = parse.taggedYield(); taggedWords = Preprocess(taggedWords); for (int i = 0; i < taggedWords.size(); i++) System.out.println(taggedWords.get(i).word()); }
Tree prune(Tree tree, int start) { if (tree.isLeaf() || tree.isPreTerminal()) { return tree; } // check each node's children for deletion List<Tree> children = helper(tree.getChildrenAsList(), start); children = prune(children, tree.label(), start, start + tree.yield().size()); return tree.treeFactory().newTreeNode(tree.label(), children); }
/* applies a TreeVisitor to all projections (including the node itself) of a node in a Tree. * Does nothing if head is not in root. * @return the maximal projection of head in root. */ public static Tree applyToProjections(TreeVisitor v, Tree head, Tree root, HeadFinder hf) { Tree projection = head; Tree parent = projection.parent(root); if (parent == null && projection != root) { return null; } v.visitTree(projection); if (projection == root) { return root; } while (hf.determineHead(parent) == projection) { projection = parent; v.visitTree(projection); if (projection == root) { return root; } parent = projection.parent(root); } return projection; }
/** * returns list of tree nodes to root from t. Includes root and t. Returns null if tree not found * dominated by root */ public static List<Tree> pathFromRoot(Tree t, Tree root) { if (t == root) { // if (t.equals(root)) { List<Tree> l = new ArrayList<Tree>(1); l.add(t); return l; } else if (root == null) { return null; } return root.dominationPath(t); }
private static int treeToLatexHelper( Tree t, StringBuilder c, StringBuilder h, int n, int nextN, int indent) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < indent; i++) sb.append(" "); h.append('\n').append(sb); h.append("{\\") .append(t.isLeaf() ? "" : "n") .append("tnode{z") .append(n) .append("}{") .append(t.label()) .append('}'); if (!t.isLeaf()) { for (int k = 0; k < t.children().length; k++) { h.append(", "); c.append("\\nodeconnect{z").append(n).append("}{z").append(nextN).append("}\n"); nextN = treeToLatexHelper(t.children()[k], c, h, nextN, nextN + 1, indent + 1); } } h.append('}'); return nextN; }
static Tree getPreTerminal(Tree tree, MutableInteger i, int n) { if (i.intValue() == n) { if (tree.isPreTerminal()) { return tree; } else { return getPreTerminal(tree.children()[0], i, n); } } else { if (tree.isPreTerminal()) { i.set(i.intValue() + tree.yield().size()); return null; } else { Tree[] kids = tree.children(); for (int j = 0; j < kids.length; j++) { Tree result = getPreTerminal(kids[j], i, n); if (result != null) { return result; } } return null; } } }
/** * traceTo() values that are contained in the tree are assigned to new objects. traceTo() values * that are not contained in the tree are given the old value. */ public static void fixEmptyTreeLeafs(Tree t, Map<Tree, Tree> newToOld, Map<Tree, Tree> oldToNew) { Tree[] kids = t.children(); for (int i = 0, n = kids.length; i < n; i++) { fixEmptyTreeLeafs(kids[i], newToOld, oldToNew); } if (t instanceof EmptyTreeLeaf) { EmptyTreeLeaf oldT = (EmptyTreeLeaf) newToOld.get(t); ((EmptyTreeLeaf) t).setEmptyType(oldT.emptyType()); Tree oldTraceTo = oldT.traceTo(); Tree newTraceTo = oldToNew.get(oldTraceTo); if (newTraceTo != null) ((EmptyTreeLeaf) t).setTraceTo(newTraceTo); else ((EmptyTreeLeaf) t).setTraceTo(oldTraceTo); } }
protected void tallyInternalNode(Tree lt, List parents) { // form base rule String label = lt.label().value(); Rule baseR = ltToRule(lt); ruleToLabel.put(baseR, label); // act on each history depth for (int depth = 0, maxDepth = Math.min(HISTORY_DEPTH(), parents.size()); depth <= maxDepth; depth++) { List history = new ArrayList(parents.subList(0, depth)); // tally each history level / rewrite pair rulePairs.incrementCount(new Pair(baseR, history), 1); labelPairs.incrementCount(new Pair(label, history), 1); } }
private static int treeToLatexEvenHelper( Tree t, StringBuilder c, StringBuilder h, int n, int nextN, int indent, int curDepth, int maxDepth) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < indent; i++) sb.append(" "); h.append('\n').append(sb); int tDepth = t.depth(); if (tDepth == 0 && tDepth + curDepth < maxDepth) { for (int pad = 0; pad < maxDepth - tDepth - curDepth; pad++) { h.append("{\\ntnode{pad}{}, "); } } h.append("{\\ntnode{z").append(n).append("}{").append(t.label()).append('}'); if (!t.isLeaf()) { for (int k = 0; k < t.children().length; k++) { h.append(", "); c.append("\\nodeconnect{z").append(n).append("}{z").append(nextN).append("}\n"); nextN = treeToLatexEvenHelper( t.children()[k], c, h, nextN, nextN + 1, indent + 1, curDepth + 1, maxDepth); } } if (tDepth == 0 && tDepth + curDepth < maxDepth) { for (int pad = 0; pad < maxDepth - tDepth - curDepth; pad++) { h.append('}'); } } h.append('}'); return nextN; }
public static Tree normalizeTree(Tree tree, TreeNormalizer tn, TreeFactory tf) { for (Tree node : tree) { if (node.isLeaf()) { node.label().setValue(tn.normalizeTerminal(node.label().value())); } else { node.label().setValue(tn.normalizeNonterminal(node.label().value())); } } return tn.normalizeWholeTree(tree, tf); }
/** * returns a list of categories that is the path from Tree from to Tree to within Tree root. If * either from or to is not in root, returns null. Otherwise includes both from and to in the * list. */ public static List<String> pathNodeToNode(Tree from, Tree to, Tree root) { List<Tree> fromPath = pathFromRoot(from, root); // System.out.println(treeListToCatList(fromPath)); if (fromPath == null) return null; List<Tree> toPath = pathFromRoot(to, root); // System.out.println(treeListToCatList(toPath)); if (toPath == null) return null; // System.out.println(treeListToCatList(fromPath)); // System.out.println(treeListToCatList(toPath)); int last = 0; int min = fromPath.size() <= toPath.size() ? fromPath.size() : toPath.size(); Tree lastNode = null; // while((! (fromPath.isEmpty() || toPath.isEmpty())) && // fromPath.get(0).equals(toPath.get(0))) { // lastNode = (Tree) fromPath.remove(0); // toPath.remove(0); // } while (last < min && fromPath.get(last).equals(toPath.get(last))) { lastNode = fromPath.get(last); last++; } // System.out.println(treeListToCatList(fromPath)); // System.out.println(treeListToCatList(toPath)); List<String> totalPath = new ArrayList<String>(); for (int i = fromPath.size() - 1; i >= last; i--) { Tree t = fromPath.get(i); totalPath.add("up-" + t.label().value()); } if (lastNode != null) totalPath.add("up-" + lastNode.label().value()); for (Tree t : toPath) totalPath.add("down-" + t.label().value()); // for(ListIterator i = fromPath.listIterator(fromPath.size()); i.hasPrevious(); ){ // Tree t = (Tree) i.previous(); // totalPath.add("up-" + t.label().value()); // } // if(lastNode != null) // totalPath.add("up-" + lastNode.label().value()); // for(ListIterator j = toPath.listIterator(); j.hasNext(); ){ // Tree t = (Tree) j.next(); // totalPath.add("down-" + t.label().value()); // } return totalPath; }
protected Rule ltToRule(Tree lt) { if (lt.children().length == 1) { UnaryRule ur = new UnaryRule(); ur.parent = stateNumberer.number(lt.label().value()); ur.child = stateNumberer.number(lt.children()[0].label().value()); return ur; } else { BinaryRule br = new BinaryRule(); br.parent = stateNumberer.number(lt.label().value()); br.leftChild = stateNumberer.number(lt.children()[0].label().value()); br.rightChild = stateNumberer.number(lt.children()[1].label().value()); return br; } }