public SUTime.Temporal apply(List<? extends CoreMap> tokens) {
   TokenSequenceMatcher matcher = tokenPattern.getMatcher(tokens);
   if (matcher.find()) {
     return extract(matcher);
   }
   return null;
 }
  @Override
  protected void process(
      ComplexEventChunk<StreamEvent> streamEventChunk,
      Processor nextProcessor,
      StreamEventCloner streamEventCloner,
      ComplexEventPopulater complexEventPopulater) {
    synchronized (this) {
      while (streamEventChunk.hasNext()) {
        StreamEvent streamEvent = streamEventChunk.next();
        if (logger.isDebugEnabled()) {
          logger.debug(
              String.format(
                  "Event received. Regex:%s Event:%s", regexPattern.pattern(), streamEvent));
        }

        Annotation document =
            pipeline.process(attributeExpressionExecutors[1].execute(streamEvent).toString());

        for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
          TokenSequenceMatcher matcher =
              regexPattern.getMatcher(sentence.get(CoreAnnotations.TokensAnnotation.class));
          while (matcher.find()) {
            Object[] data = new Object[attributeCount];
            data[0] = matcher.group();
            for (int i = 1; i < attributeCount; i++) {
              data[i] = matcher.group(i);
            }
            StreamEvent newStreamEvent = streamEventCloner.copyStreamEvent(streamEvent);
            complexEventPopulater.populateComplexEvent(newStreamEvent, data);
            streamEventChunk.insertBeforeCurrent(newStreamEvent);
          }
        }
        streamEventChunk.remove();
      }
    }
    nextProcessor.process(streamEventChunk);
  }
Example #3
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      Future<String> json =
          corenlpExecutor.submit(
              () -> {
                try {
                  // Get the document
                  Properties props =
                      new Properties() {
                        {
                          setProperty("annotators", "tokenize,ssplit,pos,lemma,ner");
                        }
                      };
                  Annotation doc = getDocument(props, httpExchange);
                  if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
                    StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
                    pipeline.annotate(doc);
                  }

                  // Construct the matcher
                  Map<String, String> params = getURLParams(httpExchange.getRequestURI());
                  // (get the pattern)
                  if (!params.containsKey("pattern")) {
                    respondError("Missing required parameter 'pattern'", httpExchange);
                    return "";
                  }
                  String pattern = params.get("pattern");
                  // (get whether to filter / find)
                  String filterStr = params.getOrDefault("filter", "false");
                  final boolean filter =
                      filterStr.trim().isEmpty()
                          || "true".equalsIgnoreCase(filterStr.toLowerCase());
                  // (create the matcher)
                  final TokenSequencePattern regex = TokenSequencePattern.compile(pattern);

                  // Run TokensRegex
                  return JSONOutputter.JSONWriter.objectToJSON(
                      (docWriter) -> {
                        if (filter) {
                          // Case: just filter sentences
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          regex
                                              .matcher(
                                                  sentence.get(
                                                      CoreAnnotations.TokensAnnotation.class))
                                              .matches())
                                  .collect(Collectors.toList()));
                        } else {
                          // Case: find matches
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          (Consumer<JSONOutputter.Writer>)
                                              (JSONOutputter.Writer sentWriter) -> {
                                                List<CoreLabel> tokens =
                                                    sentence.get(
                                                        CoreAnnotations.TokensAnnotation.class);
                                                TokenSequenceMatcher matcher =
                                                    regex.matcher(tokens);
                                                int i = 0;
                                                while (matcher.find()) {
                                                  sentWriter.set(
                                                      Integer.toString(i),
                                                      (Consumer<JSONOutputter.Writer>)
                                                          (JSONOutputter.Writer matchWriter) -> {
                                                            matchWriter.set(
                                                                "text", matcher.group());
                                                            matchWriter.set(
                                                                "begin", matcher.start());
                                                            matchWriter.set("end", matcher.end());
                                                            for (int groupI = 0;
                                                                groupI < matcher.groupCount();
                                                                ++groupI) {
                                                              SequenceMatchResult.MatchedGroupInfo<
                                                                      CoreMap>
                                                                  info =
                                                                      matcher.groupInfo(groupI + 1);
                                                              matchWriter.set(
                                                                  info.varName == null
                                                                      ? Integer.toString(groupI + 1)
                                                                      : info.varName,
                                                                  (Consumer<JSONOutputter.Writer>)
                                                                      groupWriter -> {
                                                                        groupWriter.set(
                                                                            "text", info.text);
                                                                        if (info.nodes.size() > 0) {
                                                                          groupWriter.set(
                                                                              "begin",
                                                                              info.nodes
                                                                                      .get(0)
                                                                                      .get(
                                                                                          CoreAnnotations
                                                                                              .IndexAnnotation
                                                                                              .class)
                                                                                  - 1);
                                                                          groupWriter.set(
                                                                              "end",
                                                                              info.nodes
                                                                                  .get(
                                                                                      info.nodes
                                                                                              .size()
                                                                                          - 1)
                                                                                  .get(
                                                                                      CoreAnnotations
                                                                                          .IndexAnnotation
                                                                                          .class));
                                                                        }
                                                                      });
                                                            }
                                                          });
                                                  i += 1;
                                                }
                                                sentWriter.set("length", i);
                                              }));
                        }
                      });
                } catch (Exception e) {
                  e.printStackTrace();
                  try {
                    respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
                  } catch (IOException ignored) {
                  }
                }
                return "";
              });

      // Send response
      byte[] response = new byte[0];
      try {
        response = json.get(5, TimeUnit.SECONDS).getBytes();
      } catch (InterruptedException | ExecutionException | TimeoutException e) {
        respondError("Timeout when executing TokensRegex query", httpExchange);
      }
      if (response.length > 0) {
        httpExchange.getResponseHeaders().add("Content-Type", "text/json");
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      }
    }