public SUTime.Temporal apply(List<? extends CoreMap> tokens) { TokenSequenceMatcher matcher = tokenPattern.getMatcher(tokens); if (matcher.find()) { return extract(matcher); } return null; }
@Override protected void process( ComplexEventChunk<StreamEvent> streamEventChunk, Processor nextProcessor, StreamEventCloner streamEventCloner, ComplexEventPopulater complexEventPopulater) { synchronized (this) { while (streamEventChunk.hasNext()) { StreamEvent streamEvent = streamEventChunk.next(); if (logger.isDebugEnabled()) { logger.debug( String.format( "Event received. Regex:%s Event:%s", regexPattern.pattern(), streamEvent)); } Annotation document = pipeline.process(attributeExpressionExecutors[1].execute(streamEvent).toString()); for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) { TokenSequenceMatcher matcher = regexPattern.getMatcher(sentence.get(CoreAnnotations.TokensAnnotation.class)); while (matcher.find()) { Object[] data = new Object[attributeCount]; data[0] = matcher.group(); for (int i = 1; i < attributeCount; i++) { data[i] = matcher.group(i); } StreamEvent newStreamEvent = streamEventCloner.copyStreamEvent(streamEvent); complexEventPopulater.populateComplexEvent(newStreamEvent, data); streamEventChunk.insertBeforeCurrent(newStreamEvent); } } streamEventChunk.remove(); } } nextProcessor.process(streamEventChunk); }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final TokenSequencePattern regex = TokenSequencePattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( CoreAnnotations.TokensAnnotation.class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { List<CoreLabel> tokens = sentence.get( CoreAnnotations.TokensAnnotation.class); TokenSequenceMatcher matcher = regex.matcher(tokens); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { matchWriter.set( "text", matcher.group()); matchWriter.set( "begin", matcher.start()); matchWriter.set("end", matcher.end()); for (int groupI = 0; groupI < matcher.groupCount(); ++groupI) { SequenceMatchResult.MatchedGroupInfo< CoreMap> info = matcher.groupInfo(groupI + 1); matchWriter.set( info.varName == null ? Integer.toString(groupI + 1) : info.varName, (Consumer<JSONOutputter.Writer>) groupWriter -> { groupWriter.set( "text", info.text); if (info.nodes.size() > 0) { groupWriter.set( "begin", info.nodes .get(0) .get( CoreAnnotations .IndexAnnotation .class) - 1); groupWriter.set( "end", info.nodes .get( info.nodes .size() - 1) .get( CoreAnnotations .IndexAnnotation .class)); } }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing TokensRegex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }