public void testGetCommonAncestor() { IndexedWord common = graph.getCommonAncestor(graph.getNodeByIndex(43), graph.getNodeByIndex(44)); assertEquals(45, common.index()); common = graph.getCommonAncestor(graph.getNodeByIndex(41), graph.getNodeByIndex(39)); assertEquals(41, common.index()); common = graph.getCommonAncestor(graph.getNodeByIndex(39), graph.getNodeByIndex(41)); assertEquals(41, common.index()); common = graph.getCommonAncestor(graph.getNodeByIndex(40), graph.getNodeByIndex(42)); assertEquals(41, common.index()); // too far for this method common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(42)); assertEquals(null, common); common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(10)); assertEquals(10, common.index()); common = graph.getCommonAncestor(graph.getNodeByIndex(40), graph.getNodeByIndex(40)); assertEquals(40, common.index()); // a couple tests at the top of the graph common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(1)); assertEquals(10, common.index()); common = graph.getCommonAncestor(graph.getNodeByIndex(1), graph.getNodeByIndex(10)); assertEquals(10, common.index()); }
private static void verifySet(Collection<IndexedWord> nodes, int... expected) { Set<Integer> results = Generics.newTreeSet(); for (IndexedWord node : nodes) { results.add(node.index()); } Set<Integer> expectedIndices = Generics.newTreeSet(); for (Integer index : expected) { expectedIndices.add(index); } assertEquals(expectedIndices, results); }
public void testCommonAncestor() { IndexedWord word1 = graph.getNodeByIndex(43); IndexedWord word2 = graph.getNodeByIndex(44); IndexedWord common = graph.getCommonAncestor(word1, word2); System.out.println("word1: " + word1); System.out.println("word2: " + word2); System.out.println("common: " + common); System.out.println( "common ancestor between " + word1.value() + "-" + word1.index() + " and " + word2.value() + "-" + word2.index() + " is " + common.value() + "-" + common.index()); assertEquals(45, common.index()); }
public static DependencyParse parse(String text) { if (pipeline == null) { loadModels(); } DependencyParse parse = new DependencyParse(); Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); IndexedWord root = dependencies.getFirstRoot(); parse.setHeadNode(root.index()); List<SemanticGraphEdge> edges = dependencies.edgeListSorted(); // System.out.println(edges); for (SemanticGraphEdge t : edges) { String dep = t.getDependent().originalText(); int depIndex = t.getDependent().index(); String depPOS = t.getDependent().tag(); int depStart = t.getDependent().beginPosition(); int depEnd = t.getDependent().endPosition(); String gov = t.getGovernor().originalText(); int govIndex = t.getGovernor().index(); String govPOS = t.getGovernor().tag(); int govStart = t.getGovernor().beginPosition(); int govEnd = t.getGovernor().endPosition(); parse.addNode(govIndex, gov, govPOS, govStart, govEnd); parse.addNode(depIndex, dep, depPOS, depStart, depEnd); parse.addEdge(depIndex, govIndex, t.getRelation().getShortName()); } } return parse; }
/** * NOTE: This compareTo is based on and made to be compatible with the one from * IndexedFeatureLabel. You <em>must</em> have a DocIDAnnotation, SentenceIndexAnnotation, and * IndexAnnotation for this to make sense and be guaranteed to work properly. Currently, it won't * error out and will try to return something sensible if these are not defined, but that really * isn't proper usage! * * <p>This compareTo method is based not by value elements like the word(), but on passage * position. It puts NO_WORD elements first, and then orders by document, sentence, and word * index. If these do not differ, it returns equal. * * @param w The IndexedWord to compare with * @return Whether this is less than w or not in the ordering */ @Override public int compareTo(IndexedWord w) { if (this.equals(IndexedWord.NO_WORD)) { if (w.equals(IndexedWord.NO_WORD)) { return 0; } else { return -1; } } if (w.equals(IndexedWord.NO_WORD)) { return 1; } // Override the default comparator if pseudo-positions are set. // This is needed for splicing trees together awkwardly in OpenIE. if (!Double.isNaN(w.pseudoPosition) || !Double.isNaN(this.pseudoPosition)) { double val = this.pseudoPosition() - w.pseudoPosition(); if (val < 0) { return -1; } if (val > 0) { return 1; } else { return 0; } } // Otherwise, compare using the normal doc/sentence/token index hierarchy String docID = this.getString(CoreAnnotations.DocIDAnnotation.class); int docComp = docID.compareTo(w.getString(CoreAnnotations.DocIDAnnotation.class)); if (docComp != 0) return docComp; int sentComp = sentIndex() - w.sentIndex(); if (sentComp != 0) return sentComp; int indexComp = index() - w.index(); if (indexComp != 0) return indexComp; return copyCount() - w.copyCount(); }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final SemgrexPattern regex = SemgrexPattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set( "begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set( "$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode( capture); groupWriter.set( "text", node.word()); groupWriter.set( "begin", node.index() - 1); groupWriter.set( "end", node.index()); }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing Semgrex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }