예제 #1
0
    /**
     * Parse the parameters of a connection into a CoreNLP properties file that can be passed into
     * {@link StanfordCoreNLP}, and used in the I/O stages.
     *
     * @param httpExchange The http exchange; effectively, the request information.
     * @return A {@link Properties} object corresponding to a combination of default and passed
     *     properties.
     * @throws UnsupportedEncodingException Thrown if we could not decode the key/value pairs with
     *     UTF-8.
     */
    private Properties getProperties(HttpExchange httpExchange)
        throws UnsupportedEncodingException {
      // Load the default properties
      Properties props = new Properties();
      defaultProps
          .entrySet()
          .stream()
          .forEach(
              entry -> props.setProperty(entry.getKey().toString(), entry.getValue().toString()));

      // Try to get more properties from query string.
      Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI());
      if (urlParams.containsKey("properties")) {
        StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8"))
            .entrySet()
            .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue()));
      } else if (urlParams.containsKey("props")) {
        StringUtils.decodeMap(URLDecoder.decode(urlParams.get("properties"), "UTF-8"))
            .entrySet()
            .forEach(entry -> props.setProperty(entry.getKey(), entry.getValue()));
      }

      // Make sure the properties compile
      props.setProperty(
          "annotators",
          StanfordCoreNLP.ensurePrerequisiteAnnotators(
              props.getProperty("annotators").split("[, \t]+")));

      return props;
    }
예제 #2
0
 public XBarGrammarProjection(BinaryGrammar bg, UnaryGrammar ug) {
   Map<BinaryRule, BinaryRule> binaryRules = new HashMap<BinaryRule, BinaryRule>();
   Map<UnaryRule, UnaryRule> unaryRules = new HashMap<UnaryRule, UnaryRule>();
   sourceUG = ug;
   sourceBG = bg;
   sourceNumberer = Numberer.getGlobalNumberer(bg.stateSpace());
   targetNumberer = Numberer.getGlobalNumberer(bg.stateSpace() + "-xbar");
   projection = new int[sourceNumberer.total()];
   scanStates(sourceNumberer, targetNumberer);
   targetBG = new BinaryGrammar(targetNumberer.total(), bg.stateSpace() + "-xbar");
   targetUG = new UnaryGrammar(targetNumberer.total());
   for (Iterator<BinaryRule> brI = bg.iterator(); brI.hasNext(); ) {
     BinaryRule rule = projectBinaryRule(brI.next());
     Rule old = binaryRules.get(rule);
     if (old == null || rule.score > old.score) {
       binaryRules.put(rule, rule);
     }
   }
   for (BinaryRule br : binaryRules.keySet()) {
     targetBG.addRule(br);
     // System.out.println("BR: "+targetNumberer.object(br.parent)+" ->
     // "+targetNumberer.object(br.leftChild)+" "+targetNumberer.object(br.rightChild)+" %%
     // "+br.score);
   }
   targetBG.splitRules();
   for (int parent = 0; parent < sourceNumberer.total(); parent++) {
     for (Iterator<UnaryRule> urI = ug.ruleIteratorByParent(parent); urI.hasNext(); ) {
       UnaryRule sourceRule = urI.next();
       UnaryRule rule = projectUnaryRule(sourceRule);
       Rule old = unaryRules.get(rule);
       if (old == null || rule.score > old.score) {
         unaryRules.put(rule, rule);
       }
       /*
         if (((UnaryRule)rule).child == targetNumberer.number("PRP") &&
           ((String)sourceNumberer.object(rule.parent)).charAt(0) == 'N') {
         System.out.println("Source UR: "+sourceRule+" %% "+sourceRule.score);
         System.out.println("Score of "+rule+"is now: "+((UnaryRule)unaryRules.get(rule)).score);
       }
       */
     }
   }
   for (UnaryRule ur : unaryRules.keySet()) {
     targetUG.addRule(ur);
     // System.out.println("UR: "+targetNumberer.object(ur.parent)+" ->
     // "+targetNumberer.object(ur.child)+" %% "+ur.score);
   }
   targetUG.purgeRules();
   System.out.println(
       "Projected "
           + sourceNumberer.total()
           + " states to "
           + targetNumberer.total()
           + " states.");
 }
예제 #3
0
  public static final String doCorefResolution(Annotation annotation) {

    Map<Integer, CorefChain> corefs = annotation.get(CorefChainAnnotation.class);
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<String> resolved = new ArrayList<String>();
    for (CoreMap sentence : sentences) {
      List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
      for (CoreLabel token : tokens) {
        Integer corefClustId = token.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class);
        CorefChain chain = corefs.get(corefClustId);
        if (chain == null) resolved.add(token.word());
        else {
          int sentINdx = chain.getRepresentativeMention().sentNum - 1;
          CoreMap corefSentence = sentences.get(sentINdx);
          List<CoreLabel> corefSentenceTokens = corefSentence.get(TokensAnnotation.class);
          CorefMention reprMent = chain.getRepresentativeMention();
          if (token.index() < reprMent.startIndex || token.index() > reprMent.endIndex) {
            for (int i = reprMent.startIndex; i < reprMent.endIndex; i++) {
              CoreLabel matchedLabel = corefSentenceTokens.get(i - 1);
              resolved.add(matchedLabel.word());
            }
          } else resolved.add(token.word());
        }
      }
    }
    String resolvedStr = "";
    System.out.println();
    for (String str : resolved) {
      resolvedStr += str + " ";
    }
    System.out.println(resolvedStr);

    return resolvedStr;
  }
예제 #4
0
 public Object intern(Object o) {
   Object i = oToO.get(o);
   if (i == null) {
     i = o;
     oToO.put(o, o);
   }
   return i;
 }
 public void addInPlace(SentenceKey key, SentenceStatistics sentenceStatistics) {
   for (Map<SentenceKey, EnsembleStatistics> impl : this.impl) {
     EnsembleStatistics stats = impl.get(key);
     if (stats == null) {
       stats = new EnsembleStatistics(new LinkedList<SentenceStatistics>());
       impl.put(key, stats);
     }
     stats.addInPlace(sentenceStatistics);
   }
 }
예제 #6
0
 protected String historyToString(List history) {
   String str = (String) historyToString.get(history);
   if (str == null) {
     StringBuilder sb = new StringBuilder();
     for (int i = 0; i < history.size(); i++) {
       sb.append('^');
       sb.append(history.get(i));
     }
     str = sb.toString();
     historyToString.put(history, str);
   }
   return str;
 }
 public TrainingStatistics merge(TrainingStatistics other) {
   Map<SentenceKey, EnsembleStatistics> newStats = new HashMap<>();
   // Add elements from this statistics
   for (Map<SentenceKey, EnsembleStatistics> map : this.impl) {
     for (SentenceKey key : map.keySet()) {
       newStats.put(key, new EnsembleStatistics(map.get(key)));
     }
   }
   // Add elements from other statistics
   for (Map<SentenceKey, EnsembleStatistics> map : other.impl) {
     for (SentenceKey key : map.keySet()) {
       EnsembleStatistics existing = newStats.get(key);
       if (existing == null) {
         existing = new EnsembleStatistics(new LinkedList<SentenceStatistics>());
         newStats.put(key, existing);
       }
       existing.addInPlace(map.get(key));
     }
   }
   // Return
   return new TrainingStatistics(Maybe.Just(newStats));
 }
예제 #8
0
  public Object formResult() {
    Set brs = new HashSet();
    Set urs = new HashSet();
    // scan each rule / history pair
    int ruleCount = 0;
    for (Iterator pairI = rulePairs.keySet().iterator(); pairI.hasNext(); ) {
      if (ruleCount % 100 == 0) {
        System.err.println("Rules multiplied: " + ruleCount);
      }
      ruleCount++;
      Pair rulePair = (Pair) pairI.next();
      Rule baseRule = (Rule) rulePair.first;
      String baseLabel = (String) ruleToLabel.get(baseRule);
      List history = (List) rulePair.second;
      double totalProb = 0;
      for (int depth = 1; depth <= HISTORY_DEPTH() && depth <= history.size(); depth++) {
        List subHistory = history.subList(0, depth);
        double c_label = labelPairs.getCount(new Pair(baseLabel, subHistory));
        double c_rule = rulePairs.getCount(new Pair(baseRule, subHistory));
        // System.out.println("Multiplying out "+baseRule+" with history "+subHistory);
        // System.out.println("Count of "+baseLabel+" with "+subHistory+" is "+c_label);
        // System.out.println("Count of "+baseRule+" with "+subHistory+" is "+c_rule );

        double prob = (1.0 / HISTORY_DEPTH()) * (c_rule) / (c_label);
        totalProb += prob;
        for (int childDepth = 0; childDepth <= Math.min(HISTORY_DEPTH() - 1, depth); childDepth++) {
          Rule rule = specifyRule(baseRule, subHistory, childDepth);
          rule.score = (float) Math.log(totalProb);
          // System.out.println("Created  "+rule+" with score "+rule.score);
          if (rule instanceof UnaryRule) {
            urs.add(rule);
          } else {
            brs.add(rule);
          }
        }
      }
    }
    System.out.println("Total states: " + stateNumberer.total());
    BinaryGrammar bg = new BinaryGrammar(stateNumberer.total());
    UnaryGrammar ug = new UnaryGrammar(stateNumberer.total());
    for (Iterator brI = brs.iterator(); brI.hasNext(); ) {
      BinaryRule br = (BinaryRule) brI.next();
      bg.addRule(br);
    }
    for (Iterator urI = urs.iterator(); urI.hasNext(); ) {
      UnaryRule ur = (UnaryRule) urI.next();
      ug.addRule(ur);
    }
    return new Pair(ug, bg);
  }
예제 #9
0
 @Override
 public void handle(HttpExchange httpExchange) throws IOException {
   Map<String, String> urlParams = getURLParams(httpExchange.getRequestURI());
   httpExchange.getResponseHeaders().set("Content-Type", "text/plain");
   boolean doExit = false;
   String response = "Invalid shutdown key\n";
   if (urlParams.containsKey("key") && urlParams.get("key").equals(shutdownKey)) {
     response = "Shutdown successful!\n";
     doExit = true;
   }
   httpExchange.sendResponseHeaders(HTTP_OK, response.getBytes().length);
   httpExchange.getResponseBody().write(response.getBytes());
   httpExchange.close();
   if (doExit) {
     System.exit(0);
   }
 }
예제 #10
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      Future<String> json =
          corenlpExecutor.submit(
              () -> {
                try {
                  // Get the document
                  Properties props =
                      new Properties() {
                        {
                          setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse");
                        }
                      };
                  Annotation doc = getDocument(props, httpExchange);
                  if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
                    StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
                    pipeline.annotate(doc);
                  }

                  // Construct the matcher
                  Map<String, String> params = getURLParams(httpExchange.getRequestURI());
                  // (get the pattern)
                  if (!params.containsKey("pattern")) {
                    respondError("Missing required parameter 'pattern'", httpExchange);
                    return "";
                  }
                  String pattern = params.get("pattern");
                  // (get whether to filter / find)
                  String filterStr = params.getOrDefault("filter", "false");
                  final boolean filter =
                      filterStr.trim().isEmpty()
                          || "true".equalsIgnoreCase(filterStr.toLowerCase());
                  // (create the matcher)
                  final SemgrexPattern regex = SemgrexPattern.compile(pattern);

                  // Run TokensRegex
                  return JSONOutputter.JSONWriter.objectToJSON(
                      (docWriter) -> {
                        if (filter) {
                          // Case: just filter sentences
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          regex
                                              .matcher(
                                                  sentence.get(
                                                      SemanticGraphCoreAnnotations
                                                          .CollapsedCCProcessedDependenciesAnnotation
                                                          .class))
                                              .matches())
                                  .collect(Collectors.toList()));
                        } else {
                          // Case: find matches
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          (Consumer<JSONOutputter.Writer>)
                                              (JSONOutputter.Writer sentWriter) -> {
                                                SemgrexMatcher matcher =
                                                    regex.matcher(
                                                        sentence.get(
                                                            SemanticGraphCoreAnnotations
                                                                .CollapsedCCProcessedDependenciesAnnotation
                                                                .class));
                                                int i = 0;
                                                while (matcher.find()) {
                                                  sentWriter.set(
                                                      Integer.toString(i),
                                                      (Consumer<JSONOutputter.Writer>)
                                                          (JSONOutputter.Writer matchWriter) -> {
                                                            IndexedWord match = matcher.getMatch();
                                                            matchWriter.set("text", match.word());
                                                            matchWriter.set(
                                                                "begin", match.index() - 1);
                                                            matchWriter.set("end", match.index());
                                                            for (String capture :
                                                                matcher.getNodeNames()) {
                                                              matchWriter.set(
                                                                  "$" + capture,
                                                                  (Consumer<JSONOutputter.Writer>)
                                                                      groupWriter -> {
                                                                        IndexedWord node =
                                                                            matcher.getNode(
                                                                                capture);
                                                                        groupWriter.set(
                                                                            "text", node.word());
                                                                        groupWriter.set(
                                                                            "begin",
                                                                            node.index() - 1);
                                                                        groupWriter.set(
                                                                            "end", node.index());
                                                                      });
                                                            }
                                                          });
                                                  i += 1;
                                                }
                                                sentWriter.set("length", i);
                                              }));
                        }
                      });
                } catch (Exception e) {
                  e.printStackTrace();
                  try {
                    respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
                  } catch (IOException ignored) {
                  }
                }
                return "";
              });

      // Send response
      byte[] response = new byte[0];
      try {
        response = json.get(5, TimeUnit.SECONDS).getBytes();
      } catch (InterruptedException | ExecutionException | TimeoutException e) {
        respondError("Timeout when executing Semgrex query", httpExchange);
      }
      if (response.length > 0) {
        httpExchange.getResponseHeaders().add("Content-Type", "text/json");
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      }
    }