public static void main(String[] args) throws IOException, ClassNotFoundException { Timing tim = new Timing(); AnnotationPipeline ap = new AnnotationPipeline(); boolean verbose = false; ap.addAnnotator(new TokenizerAnnotator(verbose, "en")); ap.addAnnotator(new WordsToSentencesAnnotator(verbose)); // ap.addAnnotator(new NERCombinerAnnotator(verbose)); // ap.addAnnotator(new OldNERAnnotator(verbose)); // ap.addAnnotator(new NERMergingAnnotator(verbose)); ap.addAnnotator(new ParserAnnotator(verbose, -1)); /** * ap.addAnnotator(new UpdateSentenceFromParseAnnotator(verbose)); ap.addAnnotator(new * NumberAnnotator(verbose)); ap.addAnnotator(new * QuantifiableEntityNormalizingAnnotator(verbose)); ap.addAnnotator(new * StemmerAnnotator(verbose)); ap.addAnnotator(new MorphaAnnotator(verbose)); */ // ap.addAnnotator(new SRLAnnotator()); String text = ("USAir said in the filings that Mr. Icahn first contacted Mr. Colodny last September to discuss the benefits of combining TWA and USAir -- either by TWA's acquisition of USAir, or USAir's acquisition of TWA."); Annotation a = new Annotation(text); ap.annotate(a); System.out.println(a.get(CoreAnnotations.TokensAnnotation.class)); for (CoreMap sentence : a.get(CoreAnnotations.SentencesAnnotation.class)) { System.out.println(sentence.get(TreeCoreAnnotations.TreeAnnotation.class)); } if (TIME) { System.out.println(ap.timingInformation()); System.err.println("Total time for AnnotationPipeline: " + tim.toSecondsString() + " sec."); } }
public static final String doCorefResolution(Annotation annotation) { Map<Integer, CorefChain> corefs = annotation.get(CorefChainAnnotation.class); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); List<String> resolved = new ArrayList<String>(); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { Integer corefClustId = token.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); CorefChain chain = corefs.get(corefClustId); if (chain == null) resolved.add(token.word()); else { int sentINdx = chain.getRepresentativeMention().sentNum - 1; CoreMap corefSentence = sentences.get(sentINdx); List<CoreLabel> corefSentenceTokens = corefSentence.get(TokensAnnotation.class); CorefMention reprMent = chain.getRepresentativeMention(); if (token.index() < reprMent.startIndex || token.index() > reprMent.endIndex) { for (int i = reprMent.startIndex; i < reprMent.endIndex; i++) { CoreLabel matchedLabel = corefSentenceTokens.get(i - 1); resolved.add(matchedLabel.word()); } } else resolved.add(token.word()); } } } String resolvedStr = ""; System.out.println(); for (String str : resolved) { resolvedStr += str + " "; } System.out.println(resolvedStr); return resolvedStr; }
@Override public void annotate(Annotation annotation) { if (verbose) { System.err.print("Adding TokensRegexNER annotations ... "); } List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); if (sentences != null) { for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); annotateMatched(tokens); } } else { List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); if (tokens != null) { annotateMatched(tokens); } else { throw new RuntimeException("Unable to find sentences or tokens in " + annotation); } } if (verbose) System.err.println("done."); }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final SemgrexPattern regex = SemgrexPattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set( "begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set( "$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode( capture); groupWriter.set( "text", node.word()); groupWriter.set( "begin", node.index() - 1); groupWriter.set( "end", node.index()); }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing Semgrex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); // Get sentence. Properties props; Annotation ann; StanfordCoreNLP.OutputFormat of; log("[" + httpExchange.getRemoteAddress() + "] Received message"); try { props = getProperties(httpExchange); ann = getDocument(props, httpExchange); of = StanfordCoreNLP.OutputFormat.valueOf( props.getProperty("outputFormat", "json").toUpperCase()); // Handle direct browser connections (i.e., not a POST request). if (ann.get(CoreAnnotations.TextAnnotation.class).length() == 0) { log("[" + httpExchange.getRemoteAddress() + "] Interactive connection"); staticPageHandle.handle(httpExchange); return; } log("[" + httpExchange.getRemoteAddress() + "] API call"); } catch (Exception e) { // Return error message. e.printStackTrace(); String response = e.getMessage(); httpExchange.getResponseHeaders().add("Content-Type", "text/plain"); httpExchange.sendResponseHeaders(HTTP_BAD_INPUT, response.length()); httpExchange.getResponseBody().write(response.getBytes()); httpExchange.close(); return; } try { // Annotate StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); Future<Annotation> completedAnnotationFuture = corenlpExecutor.submit( () -> { pipeline.annotate(ann); return ann; }); Annotation completedAnnotation = completedAnnotationFuture.get(5, TimeUnit.SECONDS); // Get output ByteArrayOutputStream os = new ByteArrayOutputStream(); StanfordCoreNLP.createOutputter(props, AnnotationOutputter.getOptions(pipeline)) .accept(completedAnnotation, os); os.close(); byte[] response = os.toByteArray(); httpExchange.getResponseHeaders().add("Content-Type", getContentType(props, of)); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } catch (TimeoutException e) { respondError("CoreNLP request timed out", httpExchange); } catch (Exception e) { // Return error message. respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } }