/** * A little utility function to make sure a SemanticGraph is a tree. * * @param tree The tree to check. * @return True if this {@link edu.stanford.nlp.semgraph.SemanticGraph} is a tree (versus a DAG, * or Graph). */ public static boolean isTree(SemanticGraph tree) { for (IndexedWord vertex : tree.vertexSet()) { // Check one and only one incoming edge if (tree.getRoots().contains(vertex)) { if (tree.incomingEdgeIterator(vertex).hasNext()) { return false; } } else { Iterator<SemanticGraphEdge> iter = tree.incomingEdgeIterator(vertex); if (!iter.hasNext()) { return false; } iter.next(); if (iter.hasNext()) { return false; } } // Check incoming and outgoing edges match for (SemanticGraphEdge edge : tree.outgoingEdgeIterable(vertex)) { boolean foundReverse = false; for (SemanticGraphEdge reverse : tree.incomingEdgeIterable(edge.getDependent())) { if (reverse == edge) { foundReverse = true; } } if (!foundReverse) { return false; } } for (SemanticGraphEdge edge : tree.incomingEdgeIterable(vertex)) { boolean foundReverse = false; for (SemanticGraphEdge reverse : tree.outgoingEdgeIterable(edge.getGovernor())) { if (reverse == edge) { foundReverse = true; } } if (!foundReverse) { return false; } } } // Check for cycles if (isCyclic(tree)) { return false; } // Check topological sort -- sometimes fails? // try { // tree.topologicalSort(); // } catch (Exception e) { // e.printStackTrace(); // return false; // } return true; }
/** * The basic method for splitting off a clause of a tree. This modifies the tree in place. This * method addtionally follows ref edges. * * @param tree The tree to split a clause from. * @param toKeep The edge representing the clause to keep. */ @SuppressWarnings("unchecked") private void simpleClause(SemanticGraph tree, SemanticGraphEdge toKeep) { splitToChildOfEdge(tree, toKeep); // Follow 'ref' edges Map<IndexedWord, IndexedWord> refReplaceMap = new HashMap<>(); // (find replacements) for (IndexedWord vertex : tree.vertexSet()) { for (SemanticGraphEdge edge : extraEdgesByDependent.get(vertex)) { if ("ref".equals(edge.getRelation().toString()) && // it's a ref edge... !tree.containsVertex( edge.getGovernor())) { // ...that doesn't already exist in the tree. refReplaceMap.put(vertex, edge.getGovernor()); } } } // (do replacements) for (Map.Entry<IndexedWord, IndexedWord> entry : refReplaceMap.entrySet()) { Iterator<SemanticGraphEdge> iter = tree.incomingEdgeIterator(entry.getKey()); if (!iter.hasNext()) { continue; } SemanticGraphEdge incomingEdge = iter.next(); IndexedWord governor = incomingEdge.getGovernor(); tree.removeVertex(entry.getKey()); addSubtree( tree, governor, incomingEdge.getRelation().toString(), this.tree, entry.getValue(), this.tree.incomingEdgeList(tree.getFirstRoot())); } }
/** * Determine if a tree is cyclic. * * @param tree The tree to check. * @return True if the tree has at least once cycle in it. */ public static boolean isCyclic(SemanticGraph tree) { for (IndexedWord vertex : tree.vertexSet()) { if (tree.getRoots().contains(vertex)) { continue; } IndexedWord node = tree.incomingEdgeIterator(vertex).next().getGovernor(); Set<IndexedWord> seen = new HashSet<>(); seen.add(vertex); while (node != null) { if (seen.contains(node)) { return true; } seen.add(node); if (tree.incomingEdgeIterator(node).hasNext()) { node = tree.incomingEdgeIterator(node).next().getGovernor(); } else { node = null; } } } return false; }
/** * A helper to add an entire subtree to a given dependency tree. * * @param toModify The tree to add the subtree to. * @param root The root of the tree where we should be adding the subtree. * @param rel The relation to add the subtree with. * @param originalTree The orignal tree (i.e., {@link ClauseSplitterSearchProblem#tree}). * @param subject The root of the clause to add. * @param ignoredEdges The edges to ignore adding when adding this subtree. */ private static void addSubtree( SemanticGraph toModify, IndexedWord root, String rel, SemanticGraph originalTree, IndexedWord subject, Collection<SemanticGraphEdge> ignoredEdges) { if (toModify.containsVertex(subject)) { return; // This subtree already exists. } Queue<IndexedWord> fringe = new LinkedList<>(); Collection<IndexedWord> wordsToAdd = new ArrayList<>(); Collection<SemanticGraphEdge> edgesToAdd = new ArrayList<>(); // Search for subtree to add for (SemanticGraphEdge edge : originalTree.outgoingEdgeIterable(subject)) { if (!ignoredEdges.contains(edge)) { if (toModify.containsVertex(edge.getDependent())) { // Case: we're adding a subtree that's not disjoint from toModify. This is bad news. return; } edgesToAdd.add(edge); fringe.add(edge.getDependent()); } } while (!fringe.isEmpty()) { IndexedWord node = fringe.poll(); wordsToAdd.add(node); for (SemanticGraphEdge edge : originalTree.outgoingEdgeIterable(node)) { if (!ignoredEdges.contains(edge)) { if (toModify.containsVertex(edge.getDependent())) { // Case: we're adding a subtree that's not disjoint from toModify. This is bad news. return; } edgesToAdd.add(edge); fringe.add(edge.getDependent()); } } } // Add subtree // (add subject) toModify.addVertex(subject); toModify.addEdge( root, subject, GrammaticalRelation.valueOf(Language.English, rel), Double.NEGATIVE_INFINITY, false); // (add nodes) wordsToAdd.forEach(toModify::addVertex); // (add edges) for (SemanticGraphEdge edge : edgesToAdd) { assert !toModify.incomingEdgeIterator(edge.getDependent()).hasNext(); toModify.addEdge( edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra()); } }
/** * Fix some bizarre peculiarities with certain trees. So far, these include: * * <ul> * <li>Sometimes there's a node from a word to itself. This seems wrong. * </ul> * * @param tree The tree to clean (in place!). * @return A list of extra edges, which are valid but were removed. */ public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) { // assert !isCyclic(tree); // Clean nodes List<IndexedWord> toDelete = new ArrayList<>(); for (IndexedWord vertex : tree.vertexSet()) { // Clean punctuation if (vertex.tag() == null) { continue; } char tag = vertex.backingLabel().tag().charAt(0); if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':') { if (!tree.outgoingEdgeIterator(vertex) .hasNext()) { // This should really never happen, but it does. toDelete.add(vertex); } } } toDelete.forEach(tree::removeVertex); // Clean edges Iterator<SemanticGraphEdge> iter = tree.edgeIterable().iterator(); while (iter.hasNext()) { SemanticGraphEdge edge = iter.next(); if (edge.getDependent().index() == edge.getGovernor().index()) { // Clean self-edges iter.remove(); } else if (edge.getRelation().toString().equals("punct")) { // Clean punctuation (again) if (!tree.outgoingEdgeIterator(edge.getDependent()) .hasNext()) { // This should really never happen, but it does. iter.remove(); } } } // Remove extra edges List<SemanticGraphEdge> extraEdges = new ArrayList<>(); for (SemanticGraphEdge edge : tree.edgeIterable()) { if (edge.isExtra()) { if (tree.incomingEdgeList(edge.getDependent()).size() > 1) { extraEdges.add(edge); } } } extraEdges.forEach(tree::removeEdge); // Add apposition edges (simple coref) for (SemanticGraphEdge extraEdge : new ArrayList<>(extraEdges)) { // note[gabor] prevent concurrent modification exception for (SemanticGraphEdge candidateAppos : tree.incomingEdgeIterable(extraEdge.getDependent())) { if (candidateAppos.getRelation().toString().equals("appos")) { extraEdges.add( new SemanticGraphEdge( extraEdge.getGovernor(), candidateAppos.getGovernor(), extraEdge.getRelation(), extraEdge.getWeight(), extraEdge.isExtra())); } } for (SemanticGraphEdge candidateAppos : tree.outgoingEdgeIterable(extraEdge.getDependent())) { if (candidateAppos.getRelation().toString().equals("appos")) { extraEdges.add( new SemanticGraphEdge( extraEdge.getGovernor(), candidateAppos.getDependent(), extraEdge.getRelation(), extraEdge.getWeight(), extraEdge.isExtra())); } } } // Brute force ensure tree // Remove incoming edges from roots List<SemanticGraphEdge> rootIncomingEdges = new ArrayList<>(); for (IndexedWord root : tree.getRoots()) { for (SemanticGraphEdge incomingEdge : tree.incomingEdgeIterable(root)) { rootIncomingEdges.add(incomingEdge); } } rootIncomingEdges.forEach(tree::removeEdge); // Loop until it becomes a tree. boolean changed = true; while (changed) { // I just want trees to be trees; is that so much to ask!? changed = false; List<IndexedWord> danglingNodes = new ArrayList<>(); List<SemanticGraphEdge> invalidEdges = new ArrayList<>(); for (IndexedWord vertex : tree.vertexSet()) { // Collect statistics Iterator<SemanticGraphEdge> incomingIter = tree.incomingEdgeIterator(vertex); boolean hasIncoming = incomingIter.hasNext(); boolean hasMultipleIncoming = false; if (hasIncoming) { incomingIter.next(); hasMultipleIncoming = incomingIter.hasNext(); } // Register actions if (!hasIncoming && !tree.getRoots().contains(vertex)) { danglingNodes.add(vertex); } else { if (hasMultipleIncoming) { for (SemanticGraphEdge edge : new IterableIterator<>(incomingIter)) { invalidEdges.add(edge); } } } } // Perform actions for (IndexedWord vertex : danglingNodes) { tree.removeVertex(vertex); changed = true; } for (SemanticGraphEdge edge : invalidEdges) { tree.removeEdge(edge); changed = true; } } // Return assert isTree(tree); return extraEdges; }