예제 #1
0
  public void testGetCommonAncestor() {
    IndexedWord common =
        graph.getCommonAncestor(graph.getNodeByIndex(43), graph.getNodeByIndex(44));
    assertEquals(45, common.index());

    common = graph.getCommonAncestor(graph.getNodeByIndex(41), graph.getNodeByIndex(39));
    assertEquals(41, common.index());

    common = graph.getCommonAncestor(graph.getNodeByIndex(39), graph.getNodeByIndex(41));
    assertEquals(41, common.index());

    common = graph.getCommonAncestor(graph.getNodeByIndex(40), graph.getNodeByIndex(42));
    assertEquals(41, common.index());

    // too far for this method
    common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(42));
    assertEquals(null, common);

    common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(10));
    assertEquals(10, common.index());

    common = graph.getCommonAncestor(graph.getNodeByIndex(40), graph.getNodeByIndex(40));
    assertEquals(40, common.index());

    // a couple tests at the top of the graph
    common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(1));
    assertEquals(10, common.index());

    common = graph.getCommonAncestor(graph.getNodeByIndex(1), graph.getNodeByIndex(10));
    assertEquals(10, common.index());
  }
예제 #2
0
 private static void verifySet(Collection<IndexedWord> nodes, int... expected) {
   Set<Integer> results = Generics.newTreeSet();
   for (IndexedWord node : nodes) {
     results.add(node.index());
   }
   Set<Integer> expectedIndices = Generics.newTreeSet();
   for (Integer index : expected) {
     expectedIndices.add(index);
   }
   assertEquals(expectedIndices, results);
 }
예제 #3
0
 public void testCommonAncestor() {
   IndexedWord word1 = graph.getNodeByIndex(43);
   IndexedWord word2 = graph.getNodeByIndex(44);
   IndexedWord common = graph.getCommonAncestor(word1, word2);
   System.out.println("word1: " + word1);
   System.out.println("word2: " + word2);
   System.out.println("common: " + common);
   System.out.println(
       "common ancestor between  "
           + word1.value()
           + "-"
           + word1.index()
           + " and "
           + word2.value()
           + "-"
           + word2.index()
           + " is "
           + common.value()
           + "-"
           + common.index());
   assertEquals(45, common.index());
 }
예제 #4
0
  public static DependencyParse parse(String text) {

    if (pipeline == null) {
      loadModels();
    }

    DependencyParse parse = new DependencyParse();

    Annotation document = new Annotation(text);

    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {

      SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);

      IndexedWord root = dependencies.getFirstRoot();

      parse.setHeadNode(root.index());

      List<SemanticGraphEdge> edges = dependencies.edgeListSorted();

      // System.out.println(edges);
      for (SemanticGraphEdge t : edges) {

        String dep = t.getDependent().originalText();
        int depIndex = t.getDependent().index();
        String depPOS = t.getDependent().tag();
        int depStart = t.getDependent().beginPosition();
        int depEnd = t.getDependent().endPosition();

        String gov = t.getGovernor().originalText();
        int govIndex = t.getGovernor().index();
        String govPOS = t.getGovernor().tag();
        int govStart = t.getGovernor().beginPosition();
        int govEnd = t.getGovernor().endPosition();

        parse.addNode(govIndex, gov, govPOS, govStart, govEnd);
        parse.addNode(depIndex, dep, depPOS, depStart, depEnd);

        parse.addEdge(depIndex, govIndex, t.getRelation().getShortName());
      }
    }

    return parse;
  }
예제 #5
0
  /**
   * NOTE: This compareTo is based on and made to be compatible with the one from
   * IndexedFeatureLabel. You <em>must</em> have a DocIDAnnotation, SentenceIndexAnnotation, and
   * IndexAnnotation for this to make sense and be guaranteed to work properly. Currently, it won't
   * error out and will try to return something sensible if these are not defined, but that really
   * isn't proper usage!
   *
   * <p>This compareTo method is based not by value elements like the word(), but on passage
   * position. It puts NO_WORD elements first, and then orders by document, sentence, and word
   * index. If these do not differ, it returns equal.
   *
   * @param w The IndexedWord to compare with
   * @return Whether this is less than w or not in the ordering
   */
  @Override
  public int compareTo(IndexedWord w) {
    if (this.equals(IndexedWord.NO_WORD)) {
      if (w.equals(IndexedWord.NO_WORD)) {
        return 0;
      } else {
        return -1;
      }
    }
    if (w.equals(IndexedWord.NO_WORD)) {
      return 1;
    }

    // Override the default comparator if pseudo-positions are set.
    // This is needed for splicing trees together awkwardly in OpenIE.
    if (!Double.isNaN(w.pseudoPosition) || !Double.isNaN(this.pseudoPosition)) {
      double val = this.pseudoPosition() - w.pseudoPosition();
      if (val < 0) {
        return -1;
      }
      if (val > 0) {
        return 1;
      } else {
        return 0;
      }
    }

    // Otherwise, compare using the normal doc/sentence/token index hierarchy
    String docID = this.getString(CoreAnnotations.DocIDAnnotation.class);
    int docComp = docID.compareTo(w.getString(CoreAnnotations.DocIDAnnotation.class));
    if (docComp != 0) return docComp;

    int sentComp = sentIndex() - w.sentIndex();
    if (sentComp != 0) return sentComp;

    int indexComp = index() - w.index();
    if (indexComp != 0) return indexComp;

    return copyCount() - w.copyCount();
  }
예제 #6
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      Future<String> json =
          corenlpExecutor.submit(
              () -> {
                try {
                  // Get the document
                  Properties props =
                      new Properties() {
                        {
                          setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse");
                        }
                      };
                  Annotation doc = getDocument(props, httpExchange);
                  if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
                    StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
                    pipeline.annotate(doc);
                  }

                  // Construct the matcher
                  Map<String, String> params = getURLParams(httpExchange.getRequestURI());
                  // (get the pattern)
                  if (!params.containsKey("pattern")) {
                    respondError("Missing required parameter 'pattern'", httpExchange);
                    return "";
                  }
                  String pattern = params.get("pattern");
                  // (get whether to filter / find)
                  String filterStr = params.getOrDefault("filter", "false");
                  final boolean filter =
                      filterStr.trim().isEmpty()
                          || "true".equalsIgnoreCase(filterStr.toLowerCase());
                  // (create the matcher)
                  final SemgrexPattern regex = SemgrexPattern.compile(pattern);

                  // Run TokensRegex
                  return JSONOutputter.JSONWriter.objectToJSON(
                      (docWriter) -> {
                        if (filter) {
                          // Case: just filter sentences
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          regex
                                              .matcher(
                                                  sentence.get(
                                                      SemanticGraphCoreAnnotations
                                                          .CollapsedCCProcessedDependenciesAnnotation
                                                          .class))
                                              .matches())
                                  .collect(Collectors.toList()));
                        } else {
                          // Case: find matches
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          (Consumer<JSONOutputter.Writer>)
                                              (JSONOutputter.Writer sentWriter) -> {
                                                SemgrexMatcher matcher =
                                                    regex.matcher(
                                                        sentence.get(
                                                            SemanticGraphCoreAnnotations
                                                                .CollapsedCCProcessedDependenciesAnnotation
                                                                .class));
                                                int i = 0;
                                                while (matcher.find()) {
                                                  sentWriter.set(
                                                      Integer.toString(i),
                                                      (Consumer<JSONOutputter.Writer>)
                                                          (JSONOutputter.Writer matchWriter) -> {
                                                            IndexedWord match = matcher.getMatch();
                                                            matchWriter.set("text", match.word());
                                                            matchWriter.set(
                                                                "begin", match.index() - 1);
                                                            matchWriter.set("end", match.index());
                                                            for (String capture :
                                                                matcher.getNodeNames()) {
                                                              matchWriter.set(
                                                                  "$" + capture,
                                                                  (Consumer<JSONOutputter.Writer>)
                                                                      groupWriter -> {
                                                                        IndexedWord node =
                                                                            matcher.getNode(
                                                                                capture);
                                                                        groupWriter.set(
                                                                            "text", node.word());
                                                                        groupWriter.set(
                                                                            "begin",
                                                                            node.index() - 1);
                                                                        groupWriter.set(
                                                                            "end", node.index());
                                                                      });
                                                            }
                                                          });
                                                  i += 1;
                                                }
                                                sentWriter.set("length", i);
                                              }));
                        }
                      });
                } catch (Exception e) {
                  e.printStackTrace();
                  try {
                    respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
                  } catch (IOException ignored) {
                  }
                }
                return "";
              });

      // Send response
      byte[] response = new byte[0];
      try {
        response = json.get(5, TimeUnit.SECONDS).getBytes();
      } catch (InterruptedException | ExecutionException | TimeoutException e) {
        respondError("Timeout when executing Semgrex query", httpExchange);
      }
      if (response.length > 0) {
        httpExchange.getResponseHeaders().add("Content-Type", "text/json");
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      }
    }