@Override public Tuple parse( IndexedWord gov, IndexedWord dep, SemanticGraph depGraph, Tuple t, Set<IndexedWord> visited) { getPOSString(gov, dep); Tuple t1; /* * Check for LeafNode */ if (depGraph.getChildren(dep).size() > 0) { t1 = parse(dep, depGraph, visited); } else { Entity e = new Entity(dep.word(), EntityType.Notion); t1 = new Tuple(e); } String s = depGraph.getEdge(gov, dep).getRelation().getSpecific(); Relation r = new Relation(s, RelationType.One2One); if (t == null) { Entity e1 = new Entity(gov.word(), EntityType.Object); Tuple t2 = new Tuple(e1); t = new Tuple(t1, r, t2); } else { t = new Tuple(t1, r, t); } logger.info(t.toString()); return t; }
public boolean isCopy(IndexedWord otherWord) { Integer myInd = get(CoreAnnotations.IndexAnnotation.class); Integer otherInd = otherWord.get(CoreAnnotations.IndexAnnotation.class); if (myInd == null) { if (otherInd != null) return false; } else if (!myInd.equals(otherInd)) { return false; } Integer mySentInd = get(CoreAnnotations.SentenceIndexAnnotation.class); Integer otherSentInd = otherWord.get(CoreAnnotations.SentenceIndexAnnotation.class); if (mySentInd == null) { if (otherSentInd != null) return false; } else if (!mySentInd.equals(otherSentInd)) { return false; } String myDocID = getString(CoreAnnotations.DocIDAnnotation.class); String otherDocID = otherWord.getString(CoreAnnotations.DocIDAnnotation.class); if (myDocID == null) { if (otherDocID != null) return false; } else if (!myDocID.equals(otherDocID)) { return false; } if (copyCount() == 0 || otherWord.copyCount() != 0) { return false; } return true; }
public Entity(IndexedWord... wrd) { this.name = ""; for (IndexedWord w : wrd) { this.name = this.name + " " + w.word(); } this.name = this.name.trim(); this.type = EntityType.Unknown; }
public void testGetCommonAncestor() { IndexedWord common = graph.getCommonAncestor(graph.getNodeByIndex(43), graph.getNodeByIndex(44)); assertEquals(45, common.index()); common = graph.getCommonAncestor(graph.getNodeByIndex(41), graph.getNodeByIndex(39)); assertEquals(41, common.index()); common = graph.getCommonAncestor(graph.getNodeByIndex(39), graph.getNodeByIndex(41)); assertEquals(41, common.index()); common = graph.getCommonAncestor(graph.getNodeByIndex(40), graph.getNodeByIndex(42)); assertEquals(41, common.index()); // too far for this method common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(42)); assertEquals(null, common); common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(10)); assertEquals(10, common.index()); common = graph.getCommonAncestor(graph.getNodeByIndex(40), graph.getNodeByIndex(40)); assertEquals(40, common.index()); // a couple tests at the top of the graph common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(1)); assertEquals(10, common.index()); common = graph.getCommonAncestor(graph.getNodeByIndex(1), graph.getNodeByIndex(10)); assertEquals(10, common.index()); }
public void testShortestPath() { graph.prettyPrint(); IndexedWord word1 = graph.getNodeByIndex(10); IndexedWord word2 = graph.getNodeByIndex(14); System.out.println("word1: " + word1); System.out.println("word1: " + word1.hashCode()); System.out.println("word2: " + word2); System.out.println("word2: " + word2.hashCode()); System.out.println("word eq: " + word1.equals(word2)); System.out.println("word eq: " + (word1.hashCode() == word2.hashCode())); System.out.println("word eq: " + (word1.toString().equals(word2.toString()))); List<SemanticGraphEdge> edges = graph.getShortestUndirectedPathEdges(word1, word2); System.out.println("path: " + edges); assertNotNull(edges); List<IndexedWord> nodes = graph.getShortestUndirectedPathNodes(word1, word2); System.out.println("path: " + nodes); assertNotNull(nodes); assertEquals(word1, nodes.get(0)); assertEquals(word2, nodes.get(nodes.size() - 1)); edges = graph.getShortestUndirectedPathEdges(word1, word1); System.out.println("path: " + edges); assertNotNull(edges); assertEquals(0, edges.size()); nodes = graph.getShortestUndirectedPathNodes(word1, word1); System.out.println("path: " + nodes); assertNotNull(nodes); assertEquals(1, nodes.size()); assertEquals(word1, nodes.get(0)); }
private static void verifySet(Collection<IndexedWord> nodes, int... expected) { Set<Integer> results = Generics.newTreeSet(); for (IndexedWord node : nodes) { results.add(node.index()); } Set<Integer> expectedIndices = Generics.newTreeSet(); for (Integer index : expected) { expectedIndices.add(index); } assertEquals(expectedIndices, results); }
public void testHasChildren() { SemanticGraph gr = SemanticGraph.valueOf("[ate subj>Bill dobj>[muffins compound>blueberry]]"); List<IndexedWord> vertices = gr.vertexListSorted(); for (IndexedWord word : vertices) { if (word.word().equals("ate") || word.word().equals("muffins")) { assertTrue(gr.hasChildren(word)); } else { assertFalse(gr.hasChildren(word)); } } }
public static DependencyParse parse(String text) { if (pipeline == null) { loadModels(); } DependencyParse parse = new DependencyParse(); Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); IndexedWord root = dependencies.getFirstRoot(); parse.setHeadNode(root.index()); List<SemanticGraphEdge> edges = dependencies.edgeListSorted(); // System.out.println(edges); for (SemanticGraphEdge t : edges) { String dep = t.getDependent().originalText(); int depIndex = t.getDependent().index(); String depPOS = t.getDependent().tag(); int depStart = t.getDependent().beginPosition(); int depEnd = t.getDependent().endPosition(); String gov = t.getGovernor().originalText(); int govIndex = t.getGovernor().index(); String govPOS = t.getGovernor().tag(); int govStart = t.getGovernor().beginPosition(); int govEnd = t.getGovernor().endPosition(); parse.addNode(govIndex, gov, govPOS, govStart, govEnd); parse.addNode(depIndex, dep, depPOS, depStart, depEnd); parse.addEdge(depIndex, govIndex, t.getRelation().getShortName()); } } return parse; }
public IndexedWord makeSoftCopy() { if (original != null) { return original.makeSoftCopy(); } else { return makeSoftCopy(++numCopies); } }
@SuppressWarnings("unchecked") public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean ignoreCase) { // System.out.println(node.word()); if (isRoot) return (negDesc ? !sg.getRoots().contains(node) : sg.getRoots().contains(node)); // System.out.println("not root"); if (isEmpty) return (negDesc ? !node.equals(IndexedWord.NO_WORD) : node.equals(IndexedWord.NO_WORD)); // System.err.println("Attributes are: " + attributes); for (Map.Entry<String, Pattern> attr : attributes.entrySet()) { String key = attr.getKey(); // System.out.println(key); String nodeValue; // if (key.equals("idx")) // nodeValue = Integer.toString(node.index()); // else { Class c = Env.lookupAnnotationKey(env, key); // find class for the key Object value = node.get(c); if (value == null) nodeValue = null; else nodeValue = value.toString(); // } // System.out.println(nodeValue); if (nodeValue == null) return negDesc; Pattern valuePattern = attr.getValue(); boolean matches = false; if (ignoreCase) { if (Pattern.compile(valuePattern.pattern(), Pattern.CASE_INSENSITIVE) .matcher(nodeValue) .matches()) matches = true; } else { if (nodeValue.matches(valuePattern.pattern())) matches = true; } if (!matches) { // System.out.println("doesn't match"); // System.out.println(""); return negDesc; } } // System.out.println("matches"); // System.out.println(""); return !negDesc; }
private void testParseTree() { try { Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // read some text in the text variable String text = "Give me a list of all bandleaders that play trumpet."; // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom // types List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods // this is the parse tree of the current sentence Tree tree = sentence.get(TreeAnnotation.class); // this is the Stanford dependency graph of the current sentence SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); Set<IndexedWord> vertices = dependencies.vertexSet(); List<SemanticGraphEdge> edges = dependencies.edgeListSorted(); for (SemanticGraphEdge e : edges) {} for (IndexedWord i : vertices) { System.out.println(i.toString()); } } } catch (Exception e) { } }
/** * This method attempts to resolve noun phrases which consist of more than one word. More * precisely, it looks for nn dependencies below {@code head} and creates an entity. * * @param head The head of the noun phrase * @param graph The sentence to look in. * @param words The words which make up the noun phrase * @return A distinct word */ public static String resolveNN( IndexedWord head, SemanticGraph graph, ArrayList<IndexedWord> words) { List<IndexedWord> nns = graph.getChildrenWithReln(head, EnglishGrammaticalRelations.NOUN_COMPOUND_MODIFIER); String name = ""; // check for nulls. if there is nothing here, we have nothing to do. if (nns != null) { for (IndexedWord part : nns) { name += part.word(); name += " "; words.add(part); // save this word as a part of the results } // append the head word ("starting" word) name += head.word(); words.add(head); // save this word as a part of the results return name; } else { return null; } }
public void testCommonAncestor() { IndexedWord word1 = graph.getNodeByIndex(43); IndexedWord word2 = graph.getNodeByIndex(44); IndexedWord common = graph.getCommonAncestor(word1, word2); System.out.println("word1: " + word1); System.out.println("word2: " + word2); System.out.println("common: " + common); System.out.println( "common ancestor between " + word1.value() + "-" + word1.index() + " and " + word2.value() + "-" + word2.index() + " is " + common.value() + "-" + common.index()); assertEquals(45, common.index()); }
/** * This .equals is dependent only on docID, sentenceIndex, and index. It doesn't consider the * actual word value, but assumes that it is validly represented by token position. All * IndexedWords that lack these fields will be regarded as equal. */ @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof IndexedWord)) return false; // now compare on appropriate keys final IndexedWord otherWord = (IndexedWord) o; Integer myInd = get(CoreAnnotations.IndexAnnotation.class); Integer otherInd = otherWord.get(CoreAnnotations.IndexAnnotation.class); if (myInd == null) { if (otherInd != null) return false; } else if (!myInd.equals(otherInd)) { return false; } Integer mySentInd = get(CoreAnnotations.SentenceIndexAnnotation.class); Integer otherSentInd = otherWord.get(CoreAnnotations.SentenceIndexAnnotation.class); if (mySentInd == null) { if (otherSentInd != null) return false; } else if (!mySentInd.equals(otherSentInd)) { return false; } String myDocID = getString(CoreAnnotations.DocIDAnnotation.class); String otherDocID = otherWord.getString(CoreAnnotations.DocIDAnnotation.class); if (myDocID == null) { if (otherDocID != null) return false; } else if (!myDocID.equals(otherDocID)) { return false; } if (copyCount() != otherWord.copyCount()) { return false; } // Compare pseudo-positions if ((!Double.isNaN(this.pseudoPosition) || !Double.isNaN(otherWord.pseudoPosition)) && this.pseudoPosition != otherWord.pseudoPosition) { return false; } return true; }
/** * NOTE: This compareTo is based on and made to be compatible with the one from * IndexedFeatureLabel. You <em>must</em> have a DocIDAnnotation, SentenceIndexAnnotation, and * IndexAnnotation for this to make sense and be guaranteed to work properly. Currently, it won't * error out and will try to return something sensible if these are not defined, but that really * isn't proper usage! * * <p>This compareTo method is based not by value elements like the word(), but on passage * position. It puts NO_WORD elements first, and then orders by document, sentence, and word * index. If these do not differ, it returns equal. * * @param w The IndexedWord to compare with * @return Whether this is less than w or not in the ordering */ @Override public int compareTo(IndexedWord w) { if (this.equals(IndexedWord.NO_WORD)) { if (w.equals(IndexedWord.NO_WORD)) { return 0; } else { return -1; } } if (w.equals(IndexedWord.NO_WORD)) { return 1; } // Override the default comparator if pseudo-positions are set. // This is needed for splicing trees together awkwardly in OpenIE. if (!Double.isNaN(w.pseudoPosition) || !Double.isNaN(this.pseudoPosition)) { double val = this.pseudoPosition() - w.pseudoPosition(); if (val < 0) { return -1; } if (val > 0) { return 1; } else { return 0; } } // Otherwise, compare using the normal doc/sentence/token index hierarchy String docID = this.getString(CoreAnnotations.DocIDAnnotation.class); int docComp = docID.compareTo(w.getString(CoreAnnotations.DocIDAnnotation.class)); if (docComp != 0) return docComp; int sentComp = sentIndex() - w.sentIndex(); if (sentComp != 0) return sentComp; int indexComp = index() - w.index(); if (indexComp != 0) return indexComp; return copyCount() - w.copyCount(); }
// when finished = false; break; is called, it means I successfully matched. @SuppressWarnings("null") private void goToNextNodeMatch() { decommitVariableGroups(); // make sure variable groups are free. decommitNamedNodes(); decommitNamedRelations(); finished = true; Matcher m = null; while (nodeMatchCandidateIterator.hasNext()) { if (myNode.reln.getName() != null) { String foundReln = namesToRelations.get(myNode.reln.getName()); nextMatchReln = ((GraphRelation.SearchNodeIterator) nodeMatchCandidateIterator).getReln(); if ((foundReln != null) && (!nextMatchReln.equals(foundReln))) { nextMatch = nodeMatchCandidateIterator.next(); continue; } } nextMatch = nodeMatchCandidateIterator.next(); // System.err.println("going to next match: " + nextMatch.word() + " " + // myNode.descString + " " + myNode.isLink); if (myNode.descString.equals("{}") && myNode.isLink) { IndexedWord otherNode = namesToNodes.get(myNode.name); if (otherNode != null) { if (otherNode.equals(nextMatch)) { if (!myNode.negDesc) { finished = false; break; } } else { if (myNode.negDesc) { finished = false; break; } } } else { boolean found = myNode.nodeAttrMatch(nextMatch, hyp ? sg : sg_aligned, ignoreCase); if (found) { for (Pair<Integer, String> varGroup : myNode.variableGroups) { // if variables have been captured from a regex, they // must match any previous matchings String thisVariable = varGroup.second(); String thisVarString = variableStrings.getString(thisVariable); if (thisVarString != null && !thisVarString.equals(m.group(varGroup.first()))) { // failed to match a variable found = false; break; } } // nodeAttrMatch already checks negDesc, so no need to // check for that here finished = false; break; } } } else { // try to match the description pattern. boolean found = myNode.nodeAttrMatch(nextMatch, hyp ? sg : sg_aligned, ignoreCase); if (found) { for (Pair<Integer, String> varGroup : myNode.variableGroups) { // if variables have been captured from a regex, they // must match any previous matchings String thisVariable = varGroup.second(); String thisVarString = variableStrings.getString(thisVariable); if (thisVarString != null && !thisVarString.equals(m.group(varGroup.first()))) { // failed to match a variable found = false; break; } } // nodeAttrMatch already checks negDesc, so no need to // check for that here finished = false; break; } } } // end while if (!finished) { // I successfully matched. resetChild(); if (myNode.name != null) { // note: have to fill in the map as we go for backreferencing if (!namesToNodes.containsKey(myNode.name)) { // System.err.println("making namedFirst"); namedFirst = true; } // System.err.println("adding named node: " + myNode.name + "=" + // nextMatch.word()); namesToNodes.put(myNode.name, nextMatch); } if (myNode.reln.getName() != null) { if (!namesToRelations.containsKey(myNode.reln.getName())) relnNamedFirst = true; namesToRelations.put(myNode.reln.getName(), nextMatchReln); } commitVariableGroups(m); // commit my variable groups. } // finished is false exiting this if and only if nextChild exists // and has a label or backreference that matches // (also it will just have been reset) }
/** * This method searches for an index word in a sentence tree * * @param wordToFind * @param treeToSearch * @param expectedPOS The expected POS tag for the result. If this is NULL, the method tries to * find a phrase. * @param canGoUp If TRUE the method will walk up the tree to find a phrase. * @param skip Set to "1" if you want to find the phrase for "in front of". Set to "0" otherwise. * @return The largest matching tree. */ public static Tree match( IndexedWord wordToFind, Tree treeToSearch, String expectedPOS, boolean canGoUp, int skip) { int end = wordToFind.get(EndIndexAnnotation.class); int begin = wordToFind.get(BeginIndexAnnotation.class); // first, find whatever is at the word's index for (Tree tree : treeToSearch) { CoreLabel lbl = ((CoreLabel) tree.label()); if (lbl != null && lbl.get(EndIndexAnnotation.class) != null && lbl.get(EndIndexAnnotation.class) == end) { if (lbl.get(BeginIndexAnnotation.class) == begin) { // we found the first subtree at the word's index // now, check if the word here is our searchword if (tree.getLeaves().get(0).label().value().equals(wordToFind.value())) { // we have found the label. Tree candidate = tree; if (expectedPOS != null) { // if we know our desired POS, just keep walking up the tree to find the first // instance of the expected pos while (!expectedPOS.equals(candidate.value())) { // if we don't have the right POS, just try our parent candidate = candidate.parent(treeToSearch); if (candidate == null) { return null; } } candidate = skip(candidate, treeToSearch, expectedPOS, skip); } else { // else walk up the tree again to find the corresponding phrase while (!candidate.isPhrasal()) { candidate = candidate.parent(treeToSearch); // edu.stanford.nlp.trees.Tree.parent(Tree root) if (candidate == null) { return null; } } } if (canGoUp) { // now keep walking as long as the phrase does not change. this should yield the // largest representative phrase for this word. String phrase = candidate.value(); while (phrase.equals(candidate.parent(treeToSearch).value())) { candidate = candidate.parent(treeToSearch); if (candidate == null) { return null; } } } return candidate; } } } } return null; }
@Override public double computeValue(IndexedWord label) { double result = (Objects.equals(label.ner(), tag)) ? 1.0 : 0.0; return result; }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final SemgrexPattern regex = SemgrexPattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set( "begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set( "$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode( capture); groupWriter.set( "text", node.word()); groupWriter.set( "begin", node.index() - 1); groupWriter.set( "end", node.index()); }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing Semgrex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }
/** * Fix some bizarre peculiarities with certain trees. So far, these include: * * <ul> * <li>Sometimes there's a node from a word to itself. This seems wrong. * </ul> * * @param tree The tree to clean (in place!). * @return A list of extra edges, which are valid but were removed. */ public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) { // assert !isCyclic(tree); // Clean nodes List<IndexedWord> toDelete = new ArrayList<>(); for (IndexedWord vertex : tree.vertexSet()) { // Clean punctuation if (vertex.tag() == null) { continue; } char tag = vertex.backingLabel().tag().charAt(0); if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':') { if (!tree.outgoingEdgeIterator(vertex) .hasNext()) { // This should really never happen, but it does. toDelete.add(vertex); } } } toDelete.forEach(tree::removeVertex); // Clean edges Iterator<SemanticGraphEdge> iter = tree.edgeIterable().iterator(); while (iter.hasNext()) { SemanticGraphEdge edge = iter.next(); if (edge.getDependent().index() == edge.getGovernor().index()) { // Clean self-edges iter.remove(); } else if (edge.getRelation().toString().equals("punct")) { // Clean punctuation (again) if (!tree.outgoingEdgeIterator(edge.getDependent()) .hasNext()) { // This should really never happen, but it does. iter.remove(); } } } // Remove extra edges List<SemanticGraphEdge> extraEdges = new ArrayList<>(); for (SemanticGraphEdge edge : tree.edgeIterable()) { if (edge.isExtra()) { if (tree.incomingEdgeList(edge.getDependent()).size() > 1) { extraEdges.add(edge); } } } extraEdges.forEach(tree::removeEdge); // Add apposition edges (simple coref) for (SemanticGraphEdge extraEdge : new ArrayList<>(extraEdges)) { // note[gabor] prevent concurrent modification exception for (SemanticGraphEdge candidateAppos : tree.incomingEdgeIterable(extraEdge.getDependent())) { if (candidateAppos.getRelation().toString().equals("appos")) { extraEdges.add( new SemanticGraphEdge( extraEdge.getGovernor(), candidateAppos.getGovernor(), extraEdge.getRelation(), extraEdge.getWeight(), extraEdge.isExtra())); } } for (SemanticGraphEdge candidateAppos : tree.outgoingEdgeIterable(extraEdge.getDependent())) { if (candidateAppos.getRelation().toString().equals("appos")) { extraEdges.add( new SemanticGraphEdge( extraEdge.getGovernor(), candidateAppos.getDependent(), extraEdge.getRelation(), extraEdge.getWeight(), extraEdge.isExtra())); } } } // Brute force ensure tree // Remove incoming edges from roots List<SemanticGraphEdge> rootIncomingEdges = new ArrayList<>(); for (IndexedWord root : tree.getRoots()) { for (SemanticGraphEdge incomingEdge : tree.incomingEdgeIterable(root)) { rootIncomingEdges.add(incomingEdge); } } rootIncomingEdges.forEach(tree::removeEdge); // Loop until it becomes a tree. boolean changed = true; while (changed) { // I just want trees to be trees; is that so much to ask!? changed = false; List<IndexedWord> danglingNodes = new ArrayList<>(); List<SemanticGraphEdge> invalidEdges = new ArrayList<>(); for (IndexedWord vertex : tree.vertexSet()) { // Collect statistics Iterator<SemanticGraphEdge> incomingIter = tree.incomingEdgeIterator(vertex); boolean hasIncoming = incomingIter.hasNext(); boolean hasMultipleIncoming = false; if (hasIncoming) { incomingIter.next(); hasMultipleIncoming = incomingIter.hasNext(); } // Register actions if (!hasIncoming && !tree.getRoots().contains(vertex)) { danglingNodes.add(vertex); } else { if (hasMultipleIncoming) { for (SemanticGraphEdge edge : new IterableIterator<>(incomingIter)) { invalidEdges.add(edge); } } } } // Perform actions for (IndexedWord vertex : danglingNodes) { tree.removeVertex(vertex); changed = true; } for (SemanticGraphEdge edge : invalidEdges) { tree.removeEdge(edge); changed = true; } } // Return assert isTree(tree); return extraEdges; }
public IndexedWord makeCopy(int count) { CoreLabel labelCopy = new CoreLabel(label); IndexedWord copy = new IndexedWord(labelCopy); copy.setCopyCount(count); return copy; }
public IndexedWord makeSoftCopy(int count) { IndexedWord copy = new IndexedWord(label); copy.setCopyCount(count); copy.original = this; return copy; }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Annotation document = this.processor.process(jCas.getDocumentText()); String lastNETag = "O"; int lastNEBegin = -1; int lastNEEnd = -1; for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) { // create the token annotation int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class); int end = tokenAnn.get(CharacterOffsetEndAnnotation.class); String pos = tokenAnn.get(PartOfSpeechAnnotation.class); String lemma = tokenAnn.get(LemmaAnnotation.class); Token token = new Token(jCas, begin, end); token.setPos(pos); token.setLemma(lemma); token.addToIndexes(); // hackery to convert token-level named entity tag into phrase-level tag String neTag = tokenAnn.get(NamedEntityTagAnnotation.class); if (neTag.equals("O") && !lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } else { if (lastNETag.equals("O")) { lastNEBegin = begin; } else if (lastNETag.equals(neTag)) { // do nothing - begin was already set } else { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); lastNEBegin = begin; } lastNEEnd = end; } lastNETag = neTag; } if (!lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } // add sentences and trees for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) { // add the sentence annotation int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class); int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class); Sentence sentence = new Sentence(jCas, sentBegin, sentEnd); sentence.addToIndexes(); // add the syntactic tree annotation List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class); Tree tree = sentenceAnn.get(TreeAnnotation.class); if (tree.children().length != 1) { throw new RuntimeException("Expected single root node, found " + tree); } tree = tree.firstChild(); tree.indexSpans(0); TopTreebankNode root = new TopTreebankNode(jCas); root.setTreebankParse(tree.toString()); // TODO: root.setTerminals(v) this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns); // get the dependencies SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class); // convert Stanford nodes to UIMA annotations List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>(); for (IndexedWord stanfordNode : dependencies.vertexSet()) { int indexBegin = stanfordNode.get(BeginIndexAnnotation.class); int indexEnd = stanfordNode.get(EndIndexAnnotation.class); int tokenBegin = tokens.get(indexBegin).getBegin(); int tokenEnd = tokens.get(indexEnd - 1).getEnd(); DependencyNode node; if (dependencies.getRoots().contains(stanfordNode)) { node = new TopDependencyNode(jCas, tokenBegin, tokenEnd); } else { node = new DependencyNode(jCas, tokenBegin, tokenEnd); } stanfordToUima.put(stanfordNode, node); } // create relation annotations for each Stanford dependency ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create(); ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create(); for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) { DependencyRelation relation = new DependencyRelation(jCas); DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor()); DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent()); String relationType = stanfordEdge.getRelation().toString(); if (head == null || child == null || relationType == null) { throw new RuntimeException( String.format( "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation, child, head)); } relation.setHead(head); relation.setChild(child); relation.setRelation(relationType); relation.addToIndexes(); headRelations.put(child, relation); childRelations.put(head, relation); } // set the relations for each node annotation for (DependencyNode node : stanfordToUima.values()) { List<DependencyRelation> heads = headRelations.get(node); node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size())); if (heads != null) { FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads); } List<DependencyRelation> children = childRelations.get(node); node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size())); if (children != null) { FSCollectionFactory.fillArrayFS(node.getChildRelations(), children); } node.addToIndexes(); } } // map from spans to named entity mentions Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>(); for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention); } // add mentions for all entities identified by the coreference system List<NamedEntity> entities = new ArrayList<NamedEntity>(); List<List<Token>> sentenceTokens = new ArrayList<List<Token>>(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence)); } Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class); for (CorefChain chain : corefChains.values()) { List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>(); for (CorefMention corefMention : chain.getMentionsInTextualOrder()) { // figure out the character span of the token List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1); int begin = tokens.get(corefMention.startIndex - 1).getBegin(); int end = tokens.get(corefMention.endIndex - 2).getEnd(); // use an existing named entity mention when possible; otherwise create a new one NamedEntityMention mention = spanMentionMap.get(new Span(begin, end)); if (mention == null) { mention = new NamedEntityMention(jCas, begin, end); mention.addToIndexes(); } mentions.add(mention); } // create an entity for the mentions Collections.sort( mentions, new Comparator<NamedEntityMention>() { @Override public int compare(NamedEntityMention m1, NamedEntityMention m2) { return m1.getBegin() - m2.getBegin(); } }); // create mentions and add them to entity NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, mentions.size())); int index = 0; for (NamedEntityMention mention : mentions) { mention.setMentionedEntity(entity); entity.setMentions(index, mention); index += 1; } entities.add(entity); } // add singleton entities for any named entities not picked up by coreference system for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { if (mention.getMentionedEntity() == null) { NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, 1)); entity.setMentions(0, mention); mention.setMentionedEntity(entity); entity.getMentions(); entities.add(entity); } } // sort entities by document order Collections.sort( entities, new Comparator<NamedEntity>() { @Override public int compare(NamedEntity o1, NamedEntity o2) { return getFirstBegin(o1) - getFirstBegin(o2); } private int getFirstBegin(NamedEntity entity) { int min = Integer.MAX_VALUE; for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) { if (mention.getBegin() < min) { min = mention.getBegin(); } } return min; } }); // add entities to document for (NamedEntity entity : entities) { entity.addToIndexes(); } }
/** {@inheritDoc} */ @Override public LabelFactory labelFactory() { return IndexedWord.factory(); }