Пример #1
0
    /**
     * Parse the parameters of a connection into a CoreNLP properties file that can be passed into
     * {@link StanfordCoreNLP}, and used in the I/O stages.
     *
     * @param httpExchange The http exchange; effectively, the request information.
     * @return A {@link Properties} object corresponding to a combination of default and passed
     *     properties.
     * @throws UnsupportedEncodingException Thrown if we could not decode the key/value pairs with
     *     UTF-8.
     */
    private Properties getProperties(HttpExchange httpExchange)
        throws UnsupportedEncodingException {
      // Load the default properties
      Properties props = new Properties();
      defaultProps
          .entrySet()
          .stream()
          .forEach(
              entry -> props.setProperty(entry.getKey().toString(), entry.getValue().toString()));

      // Try to get more properties from query string.
      Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI());
      if (urlParams.containsKey("properties")) {
        StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8"))
            .entrySet()
            .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue()));
      } else if (urlParams.containsKey("props")) {
        StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8"))
            .entrySet()
            .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue()));
      }

      // Make sure the properties compile
      props.setProperty(
          "annotators",
          StanfordCoreNLP.ensurePrerequisiteAnnotators(
              props.getProperty("annotators").split("[, \t]+")));

      return props;
    }
Пример #2
0
 /**
  * Reads the POST contents of the request and parses it into an Annotation object, ready to be
  * annotated. This method can also read a serialized document, if the input format is set to be
  * serialized.
  *
  * @param props The properties we are annotating with. This is where the input format is retrieved
  *     from.
  * @param httpExchange The exchange we are reading POST data from.
  * @return An Annotation representing the read document.
  * @throws IOException Thrown if we cannot read the POST data.
  * @throws ClassNotFoundException Thrown if we cannot load the serializer.
  */
 private Annotation getDocument(Properties props, HttpExchange httpExchange)
     throws IOException, ClassNotFoundException {
   String inputFormat = props.getProperty("inputFormat", "text");
   switch (inputFormat) {
     case "text":
       return new Annotation(
           IOUtils.slurpReader(new InputStreamReader(httpExchange.getRequestBody())));
     case "serialized":
       String inputSerializerName =
           props.getProperty("inputSerializer", ProtobufAnnotationSerializer.class.getName());
       AnnotationSerializer serializer = MetaClass.create(inputSerializerName).createInstance();
       Pair<Annotation, InputStream> pair = serializer.read(httpExchange.getRequestBody());
       return pair.first;
     default:
       throw new IOException("Could not parse input format: " + inputFormat);
   }
 }
  public TokensRegexNERAnnotator(String name, Properties properties) {
    String prefix = (name != null && !name.isEmpty()) ? name + '.' : "";
    String backgroundSymbol =
        properties.getProperty(prefix + "backgroundSymbol", DEFAULT_BACKGROUND_SYMBOL);
    String[] backgroundSymbols = backgroundSymbol.split("\\s*,\\s*");
    String mappingFiles =
        properties.getProperty(prefix + "mapping", DefaultPaths.DEFAULT_REGEXNER_RULES);
    String[] mappings = mappingFiles.split("\\s*[,;]\\s*");
    String validPosRegex = properties.getProperty(prefix + "validpospattern");
    this.posMatchType =
        PosMatchType.valueOf(
            properties.getProperty(prefix + "posmatchtype", DEFAULT_POS_MATCH_TYPE.name()));

    String noDefaultOverwriteLabelsProp =
        properties.getProperty(prefix + "noDefaultOverwriteLabels");
    this.noDefaultOverwriteLabels =
        (noDefaultOverwriteLabelsProp != null)
            ? Collections.unmodifiableSet(
                CollectionUtils.asSet(noDefaultOverwriteLabelsProp.split("\\s*,\\s*")))
            : Collections.unmodifiableSet(new HashSet<>());
    this.ignoreCase = PropertiesUtils.getBool(properties, prefix + "ignorecase", false);
    this.verbose = PropertiesUtils.getBool(properties, prefix + "verbose", false);

    if (validPosRegex != null && !validPosRegex.isEmpty()) {
      validPosPattern = Pattern.compile(validPosRegex);
    } else {
      validPosPattern = null;
    }
    entries =
        Collections.unmodifiableList(
            readEntries(name, noDefaultOverwriteLabels, ignoreCase, verbose, mappings));
    IdentityHashMap<SequencePattern<CoreMap>, Entry> patternToEntry = new IdentityHashMap<>();
    multiPatternMatcher = createPatternMatcher(patternToEntry);
    this.patternToEntry = Collections.unmodifiableMap(patternToEntry);
    Set<String> myLabels = Generics.newHashSet();
    // Can always override background or none.
    Collections.addAll(myLabels, backgroundSymbols);
    myLabels.add(null);
    // Always overwrite labels
    for (Entry entry : entries) myLabels.add(entry.type);
    this.myLabels = Collections.unmodifiableSet(myLabels);
  }
Пример #4
0
 /**
  * Get the response data type to send to the client, based off of the output format requested
  * from CoreNLP.
  *
  * @param props The properties being used by CoreNLP.
  * @param of The output format being output by CoreNLP.
  * @return An identifier for the type of the HTTP response (e.g., 'text/json').
  */
 public String getContentType(Properties props, StanfordCoreNLP.OutputFormat of) {
   switch (of) {
     case JSON:
       return "text/json";
     case TEXT:
     case CONLL:
       return "text/plain";
     case XML:
       return "text/xml";
     case SERIALIZED:
       String outputSerializerName = props.getProperty("outputSerializer");
       if (outputSerializerName != null
           && outputSerializerName.equals(ProtobufAnnotationSerializer.class.getName())) {
         return "application/x-protobuf";
       }
     default:
       return "application/octet-stream";
   }
 }
Пример #5
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      // Get sentence.
      Properties props;
      Annotation ann;
      StanfordCoreNLP.OutputFormat of;
      log("[" + httpExchange.getRemoteAddress() + "] Received message");
      try {
        props = getProperties(httpExchange);
        ann = getDocument(props, httpExchange);
        of =
            StanfordCoreNLP.OutputFormat.valueOf(
                props.getProperty("outputFormat", "json").toUpperCase());
        // Handle direct browser connections (i.e., not a POST request).
        if (ann.get(CoreAnnotations.TextAnnotation.class).length() == 0) {
          log("[" + httpExchange.getRemoteAddress() + "] Interactive connection");
          staticPageHandle.handle(httpExchange);
          return;
        }
        log("[" + httpExchange.getRemoteAddress() + "] API call");
      } catch (Exception e) {
        // Return error message.
        e.printStackTrace();
        String response = e.getMessage();
        httpExchange.getResponseHeaders().add("Content-Type", "text/plain");
        httpExchange.sendResponseHeaders(HTTP_BAD_INPUT, response.length());
        httpExchange.getResponseBody().write(response.getBytes());
        httpExchange.close();
        return;
      }

      try {
        // Annotate
        StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
        Future<Annotation> completedAnnotationFuture =
            corenlpExecutor.submit(
                () -> {
                  pipeline.annotate(ann);
                  return ann;
                });
        Annotation completedAnnotation = completedAnnotationFuture.get(5, TimeUnit.SECONDS);

        // Get output
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        StanfordCoreNLP.createOutputter(props, AnnotationOutputter.getOptions(pipeline))
            .accept(completedAnnotation, os);
        os.close();
        byte[] response = os.toByteArray();

        httpExchange.getResponseHeaders().add("Content-Type", getContentType(props, of));
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      } catch (TimeoutException e) {
        respondError("CoreNLP request timed out", httpExchange);
      } catch (Exception e) {
        // Return error message.
        respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
      }
    }