Esempio n. 1
0
  public static void main(String[] args) throws IOException, ClassNotFoundException {
    Timing tim = new Timing();
    AnnotationPipeline ap = new AnnotationPipeline();
    boolean verbose = false;
    ap.addAnnotator(new TokenizerAnnotator(verbose, "en"));
    ap.addAnnotator(new WordsToSentencesAnnotator(verbose));
    // ap.addAnnotator(new NERCombinerAnnotator(verbose));
    // ap.addAnnotator(new OldNERAnnotator(verbose));
    // ap.addAnnotator(new NERMergingAnnotator(verbose));
    ap.addAnnotator(new ParserAnnotator(verbose, -1));
    /**
     * ap.addAnnotator(new UpdateSentenceFromParseAnnotator(verbose)); ap.addAnnotator(new
     * NumberAnnotator(verbose)); ap.addAnnotator(new
     * QuantifiableEntityNormalizingAnnotator(verbose)); ap.addAnnotator(new
     * StemmerAnnotator(verbose)); ap.addAnnotator(new MorphaAnnotator(verbose));
     */
    //    ap.addAnnotator(new SRLAnnotator());

    String text =
        ("USAir said in the filings that Mr. Icahn first contacted Mr. Colodny last September to discuss the benefits of combining TWA and USAir -- either by TWA's acquisition of USAir, or USAir's acquisition of TWA.");
    Annotation a = new Annotation(text);
    ap.annotate(a);
    System.out.println(a.get(CoreAnnotations.TokensAnnotation.class));
    for (CoreMap sentence : a.get(CoreAnnotations.SentencesAnnotation.class)) {
      System.out.println(sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
    }

    if (TIME) {
      System.out.println(ap.timingInformation());
      System.err.println("Total time for AnnotationPipeline: " + tim.toSecondsString() + " sec.");
    }
  }
Esempio n. 2
0
  public static final String doCorefResolution(Annotation annotation) {

    Map<Integer, CorefChain> corefs = annotation.get(CorefChainAnnotation.class);
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<String> resolved = new ArrayList<String>();
    for (CoreMap sentence : sentences) {
      List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
      for (CoreLabel token : tokens) {
        Integer corefClustId = token.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class);
        CorefChain chain = corefs.get(corefClustId);
        if (chain == null) resolved.add(token.word());
        else {
          int sentINdx = chain.getRepresentativeMention().sentNum - 1;
          CoreMap corefSentence = sentences.get(sentINdx);
          List<CoreLabel> corefSentenceTokens = corefSentence.get(TokensAnnotation.class);
          CorefMention reprMent = chain.getRepresentativeMention();
          if (token.index() < reprMent.startIndex || token.index() > reprMent.endIndex) {
            for (int i = reprMent.startIndex; i < reprMent.endIndex; i++) {
              CoreLabel matchedLabel = corefSentenceTokens.get(i - 1);
              resolved.add(matchedLabel.word());
            }
          } else resolved.add(token.word());
        }
      }
    }
    String resolvedStr = "";
    System.out.println();
    for (String str : resolved) {
      resolvedStr += str + " ";
    }
    System.out.println(resolvedStr);

    return resolvedStr;
  }
  @Override
  public void annotate(Annotation annotation) {
    if (verbose) {
      System.err.print("Adding TokensRegexNER annotations ... ");
    }

    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    if (sentences != null) {
      for (CoreMap sentence : sentences) {
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        annotateMatched(tokens);
      }
    } else {
      List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
      if (tokens != null) {
        annotateMatched(tokens);
      } else {
        throw new RuntimeException("Unable to find sentences or tokens in " + annotation);
      }
    }

    if (verbose) System.err.println("done.");
  }
Esempio n. 4
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      Future<String> json =
          corenlpExecutor.submit(
              () -> {
                try {
                  // Get the document
                  Properties props =
                      new Properties() {
                        {
                          setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse");
                        }
                      };
                  Annotation doc = getDocument(props, httpExchange);
                  if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
                    StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
                    pipeline.annotate(doc);
                  }

                  // Construct the matcher
                  Map<String, String> params = getURLParams(httpExchange.getRequestURI());
                  // (get the pattern)
                  if (!params.containsKey("pattern")) {
                    respondError("Missing required parameter 'pattern'", httpExchange);
                    return "";
                  }
                  String pattern = params.get("pattern");
                  // (get whether to filter / find)
                  String filterStr = params.getOrDefault("filter", "false");
                  final boolean filter =
                      filterStr.trim().isEmpty()
                          || "true".equalsIgnoreCase(filterStr.toLowerCase());
                  // (create the matcher)
                  final SemgrexPattern regex = SemgrexPattern.compile(pattern);

                  // Run TokensRegex
                  return JSONOutputter.JSONWriter.objectToJSON(
                      (docWriter) -> {
                        if (filter) {
                          // Case: just filter sentences
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          regex
                                              .matcher(
                                                  sentence.get(
                                                      SemanticGraphCoreAnnotations
                                                          .CollapsedCCProcessedDependenciesAnnotation
                                                          .class))
                                              .matches())
                                  .collect(Collectors.toList()));
                        } else {
                          // Case: find matches
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          (Consumer<JSONOutputter.Writer>)
                                              (JSONOutputter.Writer sentWriter) -> {
                                                SemgrexMatcher matcher =
                                                    regex.matcher(
                                                        sentence.get(
                                                            SemanticGraphCoreAnnotations
                                                                .CollapsedCCProcessedDependenciesAnnotation
                                                                .class));
                                                int i = 0;
                                                while (matcher.find()) {
                                                  sentWriter.set(
                                                      Integer.toString(i),
                                                      (Consumer<JSONOutputter.Writer>)
                                                          (JSONOutputter.Writer matchWriter) -> {
                                                            IndexedWord match = matcher.getMatch();
                                                            matchWriter.set("text", match.word());
                                                            matchWriter.set(
                                                                "begin", match.index() - 1);
                                                            matchWriter.set("end", match.index());
                                                            for (String capture :
                                                                matcher.getNodeNames()) {
                                                              matchWriter.set(
                                                                  "$" + capture,
                                                                  (Consumer<JSONOutputter.Writer>)
                                                                      groupWriter -> {
                                                                        IndexedWord node =
                                                                            matcher.getNode(
                                                                                capture);
                                                                        groupWriter.set(
                                                                            "text", node.word());
                                                                        groupWriter.set(
                                                                            "begin",
                                                                            node.index() - 1);
                                                                        groupWriter.set(
                                                                            "end", node.index());
                                                                      });
                                                            }
                                                          });
                                                  i += 1;
                                                }
                                                sentWriter.set("length", i);
                                              }));
                        }
                      });
                } catch (Exception e) {
                  e.printStackTrace();
                  try {
                    respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
                  } catch (IOException ignored) {
                  }
                }
                return "";
              });

      // Send response
      byte[] response = new byte[0];
      try {
        response = json.get(5, TimeUnit.SECONDS).getBytes();
      } catch (InterruptedException | ExecutionException | TimeoutException e) {
        respondError("Timeout when executing Semgrex query", httpExchange);
      }
      if (response.length > 0) {
        httpExchange.getResponseHeaders().add("Content-Type", "text/json");
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      }
    }
Esempio n. 5
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      // Get sentence.
      Properties props;
      Annotation ann;
      StanfordCoreNLP.OutputFormat of;
      log("[" + httpExchange.getRemoteAddress() + "] Received message");
      try {
        props = getProperties(httpExchange);
        ann = getDocument(props, httpExchange);
        of =
            StanfordCoreNLP.OutputFormat.valueOf(
                props.getProperty("outputFormat", "json").toUpperCase());
        // Handle direct browser connections (i.e., not a POST request).
        if (ann.get(CoreAnnotations.TextAnnotation.class).length() == 0) {
          log("[" + httpExchange.getRemoteAddress() + "] Interactive connection");
          staticPageHandle.handle(httpExchange);
          return;
        }
        log("[" + httpExchange.getRemoteAddress() + "] API call");
      } catch (Exception e) {
        // Return error message.
        e.printStackTrace();
        String response = e.getMessage();
        httpExchange.getResponseHeaders().add("Content-Type", "text/plain");
        httpExchange.sendResponseHeaders(HTTP_BAD_INPUT, response.length());
        httpExchange.getResponseBody().write(response.getBytes());
        httpExchange.close();
        return;
      }

      try {
        // Annotate
        StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
        Future<Annotation> completedAnnotationFuture =
            corenlpExecutor.submit(
                () -> {
                  pipeline.annotate(ann);
                  return ann;
                });
        Annotation completedAnnotation = completedAnnotationFuture.get(5, TimeUnit.SECONDS);

        // Get output
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        StanfordCoreNLP.createOutputter(props, AnnotationOutputter.getOptions(pipeline))
            .accept(completedAnnotation, os);
        os.close();
        byte[] response = os.toByteArray();

        httpExchange.getResponseHeaders().add("Content-Type", getContentType(props, of));
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      } catch (TimeoutException e) {
        respondError("CoreNLP request timed out", httpExchange);
      } catch (Exception e) {
        // Return error message.
        respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
      }
    }