@Override public Tuple parse( IndexedWord gov, IndexedWord dep, SemanticGraph depGraph, Tuple t, Set<IndexedWord> visited) { getPOSString(gov, dep); Tuple t1; /* * Check for LeafNode */ if (depGraph.getChildren(dep).size() > 0) { t1 = parse(dep, depGraph, visited); } else { Entity e = new Entity(dep.word(), EntityType.Notion); t1 = new Tuple(e); } String s = depGraph.getEdge(gov, dep).getRelation().getSpecific(); Relation r = new Relation(s, RelationType.One2One); if (t == null) { Entity e1 = new Entity(gov.word(), EntityType.Object); Tuple t2 = new Tuple(e1); t = new Tuple(t1, r, t2); } else { t = new Tuple(t1, r, t); } logger.info(t.toString()); return t; }
public void testHasChildren() { SemanticGraph gr = SemanticGraph.valueOf("[ate subj>Bill dobj>[muffins compound>blueberry]]"); List<IndexedWord> vertices = gr.vertexListSorted(); for (IndexedWord word : vertices) { if (word.word().equals("ate") || word.word().equals("muffins")) { assertTrue(gr.hasChildren(word)); } else { assertFalse(gr.hasChildren(word)); } } }
public Entity(IndexedWord... wrd) { this.name = ""; for (IndexedWord w : wrd) { this.name = this.name + " " + w.word(); } this.name = this.name.trim(); this.type = EntityType.Unknown; }
/** * This method attempts to resolve noun phrases which consist of more than one word. More * precisely, it looks for nn dependencies below {@code head} and creates an entity. * * @param head The head of the noun phrase * @param graph The sentence to look in. * @param words The words which make up the noun phrase * @return A distinct word */ public static String resolveNN( IndexedWord head, SemanticGraph graph, ArrayList<IndexedWord> words) { List<IndexedWord> nns = graph.getChildrenWithReln(head, EnglishGrammaticalRelations.NOUN_COMPOUND_MODIFIER); String name = ""; // check for nulls. if there is nothing here, we have nothing to do. if (nns != null) { for (IndexedWord part : nns) { name += part.word(); name += " "; words.add(part); // save this word as a part of the results } // append the head word ("starting" word) name += head.word(); words.add(head); // save this word as a part of the results return name; } else { return null; } }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final SemgrexPattern regex = SemgrexPattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set( "begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set( "$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode( capture); groupWriter.set( "text", node.word()); groupWriter.set( "begin", node.index() - 1); groupWriter.set( "end", node.index()); }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing Semgrex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }