public static void main(String[] args) throws IOException { PrintWriter out; if (args.length > 1) { out = new PrintWriter(args[1]); } else { out = new PrintWriter(System.out); } PrintWriter xmlOut = null; if (args.length > 2) { xmlOut = new PrintWriter(args[2]); } Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner,parse"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation annotation; if (args.length > 0) { annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0])); } else { annotation = new Annotation( "Kosgi Santosh sent an email to Stanford University. He didn't get a reply."); } pipeline.annotate(annotation); pipeline.prettyPrint(annotation, out); }
private static Properties getProperties( String name, String mapping, boolean ignoreCase, String validPosRegex) { String prefix = (name != null && !name.isEmpty()) ? name + '.' : ""; Properties props = new Properties(); props.setProperty(prefix + "mapping", mapping); props.setProperty(prefix + "ignorecase", String.valueOf(ignoreCase)); if (validPosRegex != null) { props.setProperty(prefix + "validpospattern", validPosRegex); } return props; }
/** * Reads the POST contents of the request and parses it into an Annotation object, ready to be * annotated. This method can also read a serialized document, if the input format is set to be * serialized. * * @param props The properties we are annotating with. This is where the input format is retrieved * from. * @param httpExchange The exchange we are reading POST data from. * @return An Annotation representing the read document. * @throws IOException Thrown if we cannot read the POST data. * @throws ClassNotFoundException Thrown if we cannot load the serializer. */ private Annotation getDocument(Properties props, HttpExchange httpExchange) throws IOException, ClassNotFoundException { String inputFormat = props.getProperty("inputFormat", "text"); switch (inputFormat) { case "text": return new Annotation( IOUtils.slurpReader(new InputStreamReader(httpExchange.getRequestBody()))); case "serialized": String inputSerializerName = props.getProperty("inputSerializer", ProtobufAnnotationSerializer.class.getName()); AnnotationSerializer serializer = MetaClass.create(inputSerializerName).createInstance(); Pair<Annotation, InputStream> pair = serializer.read(httpExchange.getRequestBody()); return pair.first; default: throw new IOException("Could not parse input format: " + inputFormat); } }
/** * Parse the parameters of a connection into a CoreNLP properties file that can be passed into * {@link StanfordCoreNLP}, and used in the I/O stages. * * @param httpExchange The http exchange; effectively, the request information. * @return A {@link Properties} object corresponding to a combination of default and passed * properties. * @throws UnsupportedEncodingException Thrown if we could not decode the key/value pairs with * UTF-8. */ private Properties getProperties(HttpExchange httpExchange) throws UnsupportedEncodingException { // Load the default properties Properties props = new Properties(); defaultProps .entrySet() .stream() .forEach( entry -> props.setProperty(entry.getKey().toString(), entry.getValue().toString())); // Try to get more properties from query string. Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI()); if (urlParams.containsKey("properties")) { StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8")) .entrySet() .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue())); } else if (urlParams.containsKey("props")) { StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8")) .entrySet() .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue())); } // Make sure the properties compile props.setProperty( "annotators", StanfordCoreNLP.ensurePrerequisiteAnnotators( props.getProperty("annotators").split("[, \t]+"))); return props; }
public TokensRegexNERAnnotator(String name, Properties properties) { String prefix = (name != null && !name.isEmpty()) ? name + '.' : ""; String backgroundSymbol = properties.getProperty(prefix + "backgroundSymbol", DEFAULT_BACKGROUND_SYMBOL); String[] backgroundSymbols = backgroundSymbol.split("\\s*,\\s*"); String mappingFiles = properties.getProperty(prefix + "mapping", DefaultPaths.DEFAULT_REGEXNER_RULES); String[] mappings = mappingFiles.split("\\s*[,;]\\s*"); String validPosRegex = properties.getProperty(prefix + "validpospattern"); this.posMatchType = PosMatchType.valueOf( properties.getProperty(prefix + "posmatchtype", DEFAULT_POS_MATCH_TYPE.name())); String noDefaultOverwriteLabelsProp = properties.getProperty(prefix + "noDefaultOverwriteLabels"); this.noDefaultOverwriteLabels = (noDefaultOverwriteLabelsProp != null) ? Collections.unmodifiableSet( CollectionUtils.asSet(noDefaultOverwriteLabelsProp.split("\\s*,\\s*"))) : Collections.unmodifiableSet(new HashSet<>()); this.ignoreCase = PropertiesUtils.getBool(properties, prefix + "ignorecase", false); this.verbose = PropertiesUtils.getBool(properties, prefix + "verbose", false); if (validPosRegex != null && !validPosRegex.isEmpty()) { validPosPattern = Pattern.compile(validPosRegex); } else { validPosPattern = null; } entries = Collections.unmodifiableList( readEntries(name, noDefaultOverwriteLabels, ignoreCase, verbose, mappings)); IdentityHashMap<SequencePattern<CoreMap>, Entry> patternToEntry = new IdentityHashMap<>(); multiPatternMatcher = createPatternMatcher(patternToEntry); this.patternToEntry = Collections.unmodifiableMap(patternToEntry); Set<String> myLabels = Generics.newHashSet(); // Can always override background or none. Collections.addAll(myLabels, backgroundSymbols); myLabels.add(null); // Always overwrite labels for (Entry entry : entries) myLabels.add(entry.type); this.myLabels = Collections.unmodifiableSet(myLabels); }
public StanfordCoreNLPServer(int port) throws IOException { serverPort = port; defaultProps = new Properties(); defaultProps.setProperty( "annotators", "tokenize, ssplit, pos, lemma, ner, parse, depparse, natlog, openie, dcoref"); defaultProps.setProperty("inputFormat", "text"); defaultProps.setProperty("outputFormat", "json"); // Generate and write a shutdown key String tmpDir = System.getProperty("java.io.tmpdir"); File tmpFile = new File(tmpDir + File.separator + "corenlp.shutdown"); tmpFile.deleteOnExit(); if (tmpFile.exists()) { if (!tmpFile.delete()) { throw new IllegalStateException("Could not delete shutdown key file"); } } this.shutdownKey = new BigInteger(130, new Random()).toString(32); IOUtils.writeStringToFile(shutdownKey, tmpFile.getPath(), "utf-8"); // Set the static page handler this.staticPageHandle = new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.html"); }
/** * Get the response data type to send to the client, based off of the output format requested * from CoreNLP. * * @param props The properties being used by CoreNLP. * @param of The output format being output by CoreNLP. * @return An identifier for the type of the HTTP response (e.g., 'text/json'). */ public String getContentType(Properties props, StanfordCoreNLP.OutputFormat of) { switch (of) { case JSON: return "text/json"; case TEXT: case CONLL: return "text/plain"; case XML: return "text/xml"; case SERIALIZED: String outputSerializerName = props.getProperty("outputSerializer"); if (outputSerializerName != null && outputSerializerName.equals(ProtobufAnnotationSerializer.class.getName())) { return "application/x-protobuf"; } default: return "application/octet-stream"; } }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); // Get sentence. Properties props; Annotation ann; StanfordCoreNLP.OutputFormat of; log("[" + httpExchange.getRemoteAddress() + "] Received message"); try { props = getProperties(httpExchange); ann = getDocument(props, httpExchange); of = StanfordCoreNLP.OutputFormat.valueOf( props.getProperty("outputFormat", "json").toUpperCase()); // Handle direct browser connections (i.e., not a POST request). if (ann.get(CoreAnnotations.TextAnnotation.class).length() == 0) { log("[" + httpExchange.getRemoteAddress() + "] Interactive connection"); staticPageHandle.handle(httpExchange); return; } log("[" + httpExchange.getRemoteAddress() + "] API call"); } catch (Exception e) { // Return error message. e.printStackTrace(); String response = e.getMessage(); httpExchange.getResponseHeaders().add("Content-Type", "text/plain"); httpExchange.sendResponseHeaders(HTTP_BAD_INPUT, response.length()); httpExchange.getResponseBody().write(response.getBytes()); httpExchange.close(); return; } try { // Annotate StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); Future<Annotation> completedAnnotationFuture = corenlpExecutor.submit( () -> { pipeline.annotate(ann); return ann; }); Annotation completedAnnotation = completedAnnotationFuture.get(5, TimeUnit.SECONDS); // Get output ByteArrayOutputStream os = new ByteArrayOutputStream(); StanfordCoreNLP.createOutputter(props, AnnotationOutputter.getOptions(pipeline)) .accept(completedAnnotation, os); os.close(); byte[] response = os.toByteArray(); httpExchange.getResponseHeaders().add("Content-Type", getContentType(props, of)); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } catch (TimeoutException e) { respondError("CoreNLP request timed out", httpExchange); } catch (Exception e) { // Return error message. respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } }