Ejemplo n.º 1
0
  public static void main(String[] args) throws IOException {
    PrintWriter out;
    if (args.length > 1) {
      out = new PrintWriter(args[1]);
    } else {
      out = new PrintWriter(System.out);
    }
    PrintWriter xmlOut = null;
    if (args.length > 2) {
      xmlOut = new PrintWriter(args[2]);
    }
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner,parse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation annotation;
    if (args.length > 0) {
      annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0]));
    } else {
      annotation =
          new Annotation(
              "Kosgi Santosh sent an email to Stanford University. He didn't get a reply.");
    }

    pipeline.annotate(annotation);
    pipeline.prettyPrint(annotation, out);
  }
Ejemplo n.º 2
0
 private static Properties getProperties(
     String name, String mapping, boolean ignoreCase, String validPosRegex) {
   String prefix = (name != null && !name.isEmpty()) ? name + '.' : "";
   Properties props = new Properties();
   props.setProperty(prefix + "mapping", mapping);
   props.setProperty(prefix + "ignorecase", String.valueOf(ignoreCase));
   if (validPosRegex != null) {
     props.setProperty(prefix + "validpospattern", validPosRegex);
   }
   return props;
 }
Ejemplo n.º 3
0
 /**
  * Reads the POST contents of the request and parses it into an Annotation object, ready to be
  * annotated. This method can also read a serialized document, if the input format is set to be
  * serialized.
  *
  * @param props The properties we are annotating with. This is where the input format is retrieved
  *     from.
  * @param httpExchange The exchange we are reading POST data from.
  * @return An Annotation representing the read document.
  * @throws IOException Thrown if we cannot read the POST data.
  * @throws ClassNotFoundException Thrown if we cannot load the serializer.
  */
 private Annotation getDocument(Properties props, HttpExchange httpExchange)
     throws IOException, ClassNotFoundException {
   String inputFormat = props.getProperty("inputFormat", "text");
   switch (inputFormat) {
     case "text":
       return new Annotation(
           IOUtils.slurpReader(new InputStreamReader(httpExchange.getRequestBody())));
     case "serialized":
       String inputSerializerName =
           props.getProperty("inputSerializer", ProtobufAnnotationSerializer.class.getName());
       AnnotationSerializer serializer = MetaClass.create(inputSerializerName).createInstance();
       Pair<Annotation, InputStream> pair = serializer.read(httpExchange.getRequestBody());
       return pair.first;
     default:
       throw new IOException("Could not parse input format: " + inputFormat);
   }
 }
Ejemplo n.º 4
0
    /**
     * Parse the parameters of a connection into a CoreNLP properties file that can be passed into
     * {@link StanfordCoreNLP}, and used in the I/O stages.
     *
     * @param httpExchange The http exchange; effectively, the request information.
     * @return A {@link Properties} object corresponding to a combination of default and passed
     *     properties.
     * @throws UnsupportedEncodingException Thrown if we could not decode the key/value pairs with
     *     UTF-8.
     */
    private Properties getProperties(HttpExchange httpExchange)
        throws UnsupportedEncodingException {
      // Load the default properties
      Properties props = new Properties();
      defaultProps
          .entrySet()
          .stream()
          .forEach(
              entry -> props.setProperty(entry.getKey().toString(), entry.getValue().toString()));

      // Try to get more properties from query string.
      Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI());
      if (urlParams.containsKey("properties")) {
        StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8"))
            .entrySet()
            .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue()));
      } else if (urlParams.containsKey("props")) {
        StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8"))
            .entrySet()
            .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue()));
      }

      // Make sure the properties compile
      props.setProperty(
          "annotators",
          StanfordCoreNLP.ensurePrerequisiteAnnotators(
              props.getProperty("annotators").split("[, \t]+")));

      return props;
    }
Ejemplo n.º 5
0
  public TokensRegexNERAnnotator(String name, Properties properties) {
    String prefix = (name != null && !name.isEmpty()) ? name + '.' : "";
    String backgroundSymbol =
        properties.getProperty(prefix + "backgroundSymbol", DEFAULT_BACKGROUND_SYMBOL);
    String[] backgroundSymbols = backgroundSymbol.split("\\s*,\\s*");
    String mappingFiles =
        properties.getProperty(prefix + "mapping", DefaultPaths.DEFAULT_REGEXNER_RULES);
    String[] mappings = mappingFiles.split("\\s*[,;]\\s*");
    String validPosRegex = properties.getProperty(prefix + "validpospattern");
    this.posMatchType =
        PosMatchType.valueOf(
            properties.getProperty(prefix + "posmatchtype", DEFAULT_POS_MATCH_TYPE.name()));

    String noDefaultOverwriteLabelsProp =
        properties.getProperty(prefix + "noDefaultOverwriteLabels");
    this.noDefaultOverwriteLabels =
        (noDefaultOverwriteLabelsProp != null)
            ? Collections.unmodifiableSet(
                CollectionUtils.asSet(noDefaultOverwriteLabelsProp.split("\\s*,\\s*")))
            : Collections.unmodifiableSet(new HashSet<>());
    this.ignoreCase = PropertiesUtils.getBool(properties, prefix + "ignorecase", false);
    this.verbose = PropertiesUtils.getBool(properties, prefix + "verbose", false);

    if (validPosRegex != null && !validPosRegex.isEmpty()) {
      validPosPattern = Pattern.compile(validPosRegex);
    } else {
      validPosPattern = null;
    }
    entries =
        Collections.unmodifiableList(
            readEntries(name, noDefaultOverwriteLabels, ignoreCase, verbose, mappings));
    IdentityHashMap<SequencePattern<CoreMap>, Entry> patternToEntry = new IdentityHashMap<>();
    multiPatternMatcher = createPatternMatcher(patternToEntry);
    this.patternToEntry = Collections.unmodifiableMap(patternToEntry);
    Set<String> myLabels = Generics.newHashSet();
    // Can always override background or none.
    Collections.addAll(myLabels, backgroundSymbols);
    myLabels.add(null);
    // Always overwrite labels
    for (Entry entry : entries) myLabels.add(entry.type);
    this.myLabels = Collections.unmodifiableSet(myLabels);
  }
Ejemplo n.º 6
0
  public StanfordCoreNLPServer(int port) throws IOException {
    serverPort = port;

    defaultProps = new Properties();
    defaultProps.setProperty(
        "annotators", "tokenize, ssplit, pos, lemma, ner, parse, depparse, natlog, openie, dcoref");
    defaultProps.setProperty("inputFormat", "text");
    defaultProps.setProperty("outputFormat", "json");

    // Generate and write a shutdown key
    String tmpDir = System.getProperty("java.io.tmpdir");
    File tmpFile = new File(tmpDir + File.separator + "corenlp.shutdown");
    tmpFile.deleteOnExit();
    if (tmpFile.exists()) {
      if (!tmpFile.delete()) {
        throw new IllegalStateException("Could not delete shutdown key file");
      }
    }
    this.shutdownKey = new BigInteger(130, new Random()).toString(32);
    IOUtils.writeStringToFile(shutdownKey, tmpFile.getPath(), "utf-8");

    // Set the static page handler
    this.staticPageHandle = new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.html");
  }
Ejemplo n.º 7
0
 /**
  * Get the response data type to send to the client, based off of the output format requested
  * from CoreNLP.
  *
  * @param props The properties being used by CoreNLP.
  * @param of The output format being output by CoreNLP.
  * @return An identifier for the type of the HTTP response (e.g., 'text/json').
  */
 public String getContentType(Properties props, StanfordCoreNLP.OutputFormat of) {
   switch (of) {
     case JSON:
       return "text/json";
     case TEXT:
     case CONLL:
       return "text/plain";
     case XML:
       return "text/xml";
     case SERIALIZED:
       String outputSerializerName = props.getProperty("outputSerializer");
       if (outputSerializerName != null
           && outputSerializerName.equals(ProtobufAnnotationSerializer.class.getName())) {
         return "application/x-protobuf";
       }
     default:
       return "application/octet-stream";
   }
 }
Ejemplo n.º 8
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      // Get sentence.
      Properties props;
      Annotation ann;
      StanfordCoreNLP.OutputFormat of;
      log("[" + httpExchange.getRemoteAddress() + "] Received message");
      try {
        props = getProperties(httpExchange);
        ann = getDocument(props, httpExchange);
        of =
            StanfordCoreNLP.OutputFormat.valueOf(
                props.getProperty("outputFormat", "json").toUpperCase());
        // Handle direct browser connections (i.e., not a POST request).
        if (ann.get(CoreAnnotations.TextAnnotation.class).length() == 0) {
          log("[" + httpExchange.getRemoteAddress() + "] Interactive connection");
          staticPageHandle.handle(httpExchange);
          return;
        }
        log("[" + httpExchange.getRemoteAddress() + "] API call");
      } catch (Exception e) {
        // Return error message.
        e.printStackTrace();
        String response = e.getMessage();
        httpExchange.getResponseHeaders().add("Content-Type", "text/plain");
        httpExchange.sendResponseHeaders(HTTP_BAD_INPUT, response.length());
        httpExchange.getResponseBody().write(response.getBytes());
        httpExchange.close();
        return;
      }

      try {
        // Annotate
        StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
        Future<Annotation> completedAnnotationFuture =
            corenlpExecutor.submit(
                () -> {
                  pipeline.annotate(ann);
                  return ann;
                });
        Annotation completedAnnotation = completedAnnotationFuture.get(5, TimeUnit.SECONDS);

        // Get output
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        StanfordCoreNLP.createOutputter(props, AnnotationOutputter.getOptions(pipeline))
            .accept(completedAnnotation, os);
        os.close();
        byte[] response = os.toByteArray();

        httpExchange.getResponseHeaders().add("Content-Type", getContentType(props, of));
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      } catch (TimeoutException e) {
        respondError("CoreNLP request timed out", httpExchange);
      } catch (Exception e) {
        // Return error message.
        respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
      }
    }