public static void main(String[] args) throws IOException { PrintWriter out; if (args.length > 1) { out = new PrintWriter(args[1]); } else { out = new PrintWriter(System.out); } PrintWriter xmlOut = null; if (args.length > 2) { xmlOut = new PrintWriter(args[2]); } Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner,parse"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation annotation; if (args.length > 0) { annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0])); } else { annotation = new Annotation( "Kosgi Santosh sent an email to Stanford University. He didn't get a reply."); } pipeline.annotate(annotation); pipeline.prettyPrint(annotation, out); }
/** * Parse the parameters of a connection into a CoreNLP properties file that can be passed into * {@link StanfordCoreNLP}, and used in the I/O stages. * * @param httpExchange The http exchange; effectively, the request information. * @return A {@link Properties} object corresponding to a combination of default and passed * properties. * @throws UnsupportedEncodingException Thrown if we could not decode the key/value pairs with * UTF-8. */ private Properties getProperties(HttpExchange httpExchange) throws UnsupportedEncodingException { // Load the default properties Properties props = new Properties(); defaultProps .entrySet() .stream() .forEach( entry -> props.setProperty(entry.getKey().toString(), entry.getValue().toString())); // Try to get more properties from query string. Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI()); if (urlParams.containsKey("properties")) { StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8")) .entrySet() .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue())); } else if (urlParams.containsKey("props")) { StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8")) .entrySet() .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue())); } // Make sure the properties compile props.setProperty( "annotators", StanfordCoreNLP.ensurePrerequisiteAnnotators( props.getProperty("annotators").split("[, \t]+"))); return props; }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final SemgrexPattern regex = SemgrexPattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set( "begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set( "$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode( capture); groupWriter.set( "text", node.word()); groupWriter.set( "begin", node.index() - 1); groupWriter.set( "end", node.index()); }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing Semgrex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); // Get sentence. Properties props; Annotation ann; StanfordCoreNLP.OutputFormat of; log("[" + httpExchange.getRemoteAddress() + "] Received message"); try { props = getProperties(httpExchange); ann = getDocument(props, httpExchange); of = StanfordCoreNLP.OutputFormat.valueOf( props.getProperty("outputFormat", "json").toUpperCase()); // Handle direct browser connections (i.e., not a POST request). if (ann.get(CoreAnnotations.TextAnnotation.class).length() == 0) { log("[" + httpExchange.getRemoteAddress() + "] Interactive connection"); staticPageHandle.handle(httpExchange); return; } log("[" + httpExchange.getRemoteAddress() + "] API call"); } catch (Exception e) { // Return error message. e.printStackTrace(); String response = e.getMessage(); httpExchange.getResponseHeaders().add("Content-Type", "text/plain"); httpExchange.sendResponseHeaders(HTTP_BAD_INPUT, response.length()); httpExchange.getResponseBody().write(response.getBytes()); httpExchange.close(); return; } try { // Annotate StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); Future<Annotation> completedAnnotationFuture = corenlpExecutor.submit( () -> { pipeline.annotate(ann); return ann; }); Annotation completedAnnotation = completedAnnotationFuture.get(5, TimeUnit.SECONDS); // Get output ByteArrayOutputStream os = new ByteArrayOutputStream(); StanfordCoreNLP.createOutputter(props, AnnotationOutputter.getOptions(pipeline)) .accept(completedAnnotation, os); os.close(); byte[] response = os.toByteArray(); httpExchange.getResponseHeaders().add("Content-Type", getContentType(props, of)); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } catch (TimeoutException e) { respondError("CoreNLP request timed out", httpExchange); } catch (Exception e) { // Return error message. respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } }