Пример #1
0
  @Override
  public Tuple parse(
      IndexedWord gov, IndexedWord dep, SemanticGraph depGraph, Tuple t, Set<IndexedWord> visited) {
    getPOSString(gov, dep);

    Tuple t1;
    /*
     * Check for LeafNode
     */
    if (depGraph.getChildren(dep).size() > 0) {
      t1 = parse(dep, depGraph, visited);
    } else {
      Entity e = new Entity(dep.word(), EntityType.Notion);
      t1 = new Tuple(e);
    }
    String s = depGraph.getEdge(gov, dep).getRelation().getSpecific();
    Relation r = new Relation(s, RelationType.One2One);

    if (t == null) {
      Entity e1 = new Entity(gov.word(), EntityType.Object);
      Tuple t2 = new Tuple(e1);
      t = new Tuple(t1, r, t2);
    } else {
      t = new Tuple(t1, r, t);
    }
    logger.info(t.toString());
    return t;
  }
Пример #2
0
  public boolean isCopy(IndexedWord otherWord) {
    Integer myInd = get(CoreAnnotations.IndexAnnotation.class);
    Integer otherInd = otherWord.get(CoreAnnotations.IndexAnnotation.class);
    if (myInd == null) {
      if (otherInd != null) return false;
    } else if (!myInd.equals(otherInd)) {
      return false;
    }
    Integer mySentInd = get(CoreAnnotations.SentenceIndexAnnotation.class);
    Integer otherSentInd = otherWord.get(CoreAnnotations.SentenceIndexAnnotation.class);
    if (mySentInd == null) {
      if (otherSentInd != null) return false;
    } else if (!mySentInd.equals(otherSentInd)) {
      return false;
    }
    String myDocID = getString(CoreAnnotations.DocIDAnnotation.class);
    String otherDocID = otherWord.getString(CoreAnnotations.DocIDAnnotation.class);
    if (myDocID == null) {
      if (otherDocID != null) return false;
    } else if (!myDocID.equals(otherDocID)) {
      return false;
    }

    if (copyCount() == 0 || otherWord.copyCount() != 0) {
      return false;
    }

    return true;
  }
Пример #3
0
  public Entity(IndexedWord... wrd) {
    this.name = "";

    for (IndexedWord w : wrd) {
      this.name = this.name + " " + w.word();
    }

    this.name = this.name.trim();
    this.type = EntityType.Unknown;
  }
Пример #4
0
  public void testGetCommonAncestor() {
    IndexedWord common =
        graph.getCommonAncestor(graph.getNodeByIndex(43), graph.getNodeByIndex(44));
    assertEquals(45, common.index());

    common = graph.getCommonAncestor(graph.getNodeByIndex(41), graph.getNodeByIndex(39));
    assertEquals(41, common.index());

    common = graph.getCommonAncestor(graph.getNodeByIndex(39), graph.getNodeByIndex(41));
    assertEquals(41, common.index());

    common = graph.getCommonAncestor(graph.getNodeByIndex(40), graph.getNodeByIndex(42));
    assertEquals(41, common.index());

    // too far for this method
    common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(42));
    assertEquals(null, common);

    common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(10));
    assertEquals(10, common.index());

    common = graph.getCommonAncestor(graph.getNodeByIndex(40), graph.getNodeByIndex(40));
    assertEquals(40, common.index());

    // a couple tests at the top of the graph
    common = graph.getCommonAncestor(graph.getNodeByIndex(10), graph.getNodeByIndex(1));
    assertEquals(10, common.index());

    common = graph.getCommonAncestor(graph.getNodeByIndex(1), graph.getNodeByIndex(10));
    assertEquals(10, common.index());
  }
Пример #5
0
  public void testShortestPath() {

    graph.prettyPrint();
    IndexedWord word1 = graph.getNodeByIndex(10);
    IndexedWord word2 = graph.getNodeByIndex(14);
    System.out.println("word1: " + word1);
    System.out.println("word1: " + word1.hashCode());
    System.out.println("word2: " + word2);
    System.out.println("word2: " + word2.hashCode());
    System.out.println("word eq: " + word1.equals(word2));
    System.out.println("word eq: " + (word1.hashCode() == word2.hashCode()));
    System.out.println("word eq: " + (word1.toString().equals(word2.toString())));

    List<SemanticGraphEdge> edges = graph.getShortestUndirectedPathEdges(word1, word2);
    System.out.println("path: " + edges);
    assertNotNull(edges);

    List<IndexedWord> nodes = graph.getShortestUndirectedPathNodes(word1, word2);
    System.out.println("path: " + nodes);
    assertNotNull(nodes);
    assertEquals(word1, nodes.get(0));
    assertEquals(word2, nodes.get(nodes.size() - 1));

    edges = graph.getShortestUndirectedPathEdges(word1, word1);
    System.out.println("path: " + edges);
    assertNotNull(edges);
    assertEquals(0, edges.size());

    nodes = graph.getShortestUndirectedPathNodes(word1, word1);
    System.out.println("path: " + nodes);
    assertNotNull(nodes);
    assertEquals(1, nodes.size());
    assertEquals(word1, nodes.get(0));
  }
Пример #6
0
 private static void verifySet(Collection<IndexedWord> nodes, int... expected) {
   Set<Integer> results = Generics.newTreeSet();
   for (IndexedWord node : nodes) {
     results.add(node.index());
   }
   Set<Integer> expectedIndices = Generics.newTreeSet();
   for (Integer index : expected) {
     expectedIndices.add(index);
   }
   assertEquals(expectedIndices, results);
 }
Пример #7
0
  public void testHasChildren() {
    SemanticGraph gr = SemanticGraph.valueOf("[ate subj>Bill dobj>[muffins compound>blueberry]]");

    List<IndexedWord> vertices = gr.vertexListSorted();
    for (IndexedWord word : vertices) {
      if (word.word().equals("ate") || word.word().equals("muffins")) {
        assertTrue(gr.hasChildren(word));
      } else {
        assertFalse(gr.hasChildren(word));
      }
    }
  }
Пример #8
0
  public static DependencyParse parse(String text) {

    if (pipeline == null) {
      loadModels();
    }

    DependencyParse parse = new DependencyParse();

    Annotation document = new Annotation(text);

    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {

      SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);

      IndexedWord root = dependencies.getFirstRoot();

      parse.setHeadNode(root.index());

      List<SemanticGraphEdge> edges = dependencies.edgeListSorted();

      // System.out.println(edges);
      for (SemanticGraphEdge t : edges) {

        String dep = t.getDependent().originalText();
        int depIndex = t.getDependent().index();
        String depPOS = t.getDependent().tag();
        int depStart = t.getDependent().beginPosition();
        int depEnd = t.getDependent().endPosition();

        String gov = t.getGovernor().originalText();
        int govIndex = t.getGovernor().index();
        String govPOS = t.getGovernor().tag();
        int govStart = t.getGovernor().beginPosition();
        int govEnd = t.getGovernor().endPosition();

        parse.addNode(govIndex, gov, govPOS, govStart, govEnd);
        parse.addNode(depIndex, dep, depPOS, depStart, depEnd);

        parse.addEdge(depIndex, govIndex, t.getRelation().getShortName());
      }
    }

    return parse;
  }
Пример #9
0
 public IndexedWord makeSoftCopy() {
   if (original != null) {
     return original.makeSoftCopy();
   } else {
     return makeSoftCopy(++numCopies);
   }
 }
Пример #10
0
  @SuppressWarnings("unchecked")
  public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean ignoreCase) {
    // System.out.println(node.word());
    if (isRoot) return (negDesc ? !sg.getRoots().contains(node) : sg.getRoots().contains(node));
    // System.out.println("not root");
    if (isEmpty)
      return (negDesc ? !node.equals(IndexedWord.NO_WORD) : node.equals(IndexedWord.NO_WORD));

    // System.err.println("Attributes are: " + attributes);
    for (Map.Entry<String, Pattern> attr : attributes.entrySet()) {
      String key = attr.getKey();
      // System.out.println(key);
      String nodeValue;
      // if (key.equals("idx"))
      // nodeValue = Integer.toString(node.index());
      // else {

      Class c = Env.lookupAnnotationKey(env, key);
      // find class for the key

      Object value = node.get(c);
      if (value == null) nodeValue = null;
      else nodeValue = value.toString();
      // }
      // System.out.println(nodeValue);
      if (nodeValue == null) return negDesc;
      Pattern valuePattern = attr.getValue();
      boolean matches = false;
      if (ignoreCase) {
        if (Pattern.compile(valuePattern.pattern(), Pattern.CASE_INSENSITIVE)
            .matcher(nodeValue)
            .matches()) matches = true;
      } else {
        if (nodeValue.matches(valuePattern.pattern())) matches = true;
      }
      if (!matches) {

        // System.out.println("doesn't match");
        // System.out.println("");
        return negDesc;
      }
    }
    // System.out.println("matches");
    // System.out.println("");
    return !negDesc;
  }
Пример #11
0
  private void testParseTree() {
    try {
      Properties props = new Properties();
      props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
      StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

      // read some text in the text variable
      String text = "Give me a list of all bandleaders that play trumpet.";

      // create an empty Annotation just with the given text
      Annotation document = new Annotation(text);

      // run all Annotators on this text
      pipeline.annotate(document);

      // these are all the sentences in this document
      // a CoreMap is essentially a Map that uses class objects as keys and has values with custom
      // types
      List<CoreMap> sentences = document.get(SentencesAnnotation.class);

      for (CoreMap sentence : sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods

        // this is the parse tree of the current sentence
        Tree tree = sentence.get(TreeAnnotation.class);

        // this is the Stanford dependency graph of the current sentence
        SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);

        Set<IndexedWord> vertices = dependencies.vertexSet();
        List<SemanticGraphEdge> edges = dependencies.edgeListSorted();

        for (SemanticGraphEdge e : edges) {}

        for (IndexedWord i : vertices) {
          System.out.println(i.toString());
        }
      }

    } catch (Exception e) {

    }
  }
Пример #12
0
  /**
   * This method attempts to resolve noun phrases which consist of more than one word. More
   * precisely, it looks for nn dependencies below {@code head} and creates an entity.
   *
   * @param head The head of the noun phrase
   * @param graph The sentence to look in.
   * @param words The words which make up the noun phrase
   * @return A distinct word
   */
  public static String resolveNN(
      IndexedWord head, SemanticGraph graph, ArrayList<IndexedWord> words) {
    List<IndexedWord> nns =
        graph.getChildrenWithReln(head, EnglishGrammaticalRelations.NOUN_COMPOUND_MODIFIER);
    String name = "";
    // check for nulls. if there is nothing here, we have nothing to do.
    if (nns != null) {
      for (IndexedWord part : nns) {
        name += part.word();
        name += " ";

        words.add(part); // save this word as a part of the results
      }
      // append the head word ("starting" word)
      name += head.word();
      words.add(head); // save this word as a part of the results		
      return name;
    } else {
      return null;
    }
  }
Пример #13
0
 public void testCommonAncestor() {
   IndexedWord word1 = graph.getNodeByIndex(43);
   IndexedWord word2 = graph.getNodeByIndex(44);
   IndexedWord common = graph.getCommonAncestor(word1, word2);
   System.out.println("word1: " + word1);
   System.out.println("word2: " + word2);
   System.out.println("common: " + common);
   System.out.println(
       "common ancestor between  "
           + word1.value()
           + "-"
           + word1.index()
           + " and "
           + word2.value()
           + "-"
           + word2.index()
           + " is "
           + common.value()
           + "-"
           + common.index());
   assertEquals(45, common.index());
 }
Пример #14
0
  /**
   * This .equals is dependent only on docID, sentenceIndex, and index. It doesn't consider the
   * actual word value, but assumes that it is validly represented by token position. All
   * IndexedWords that lack these fields will be regarded as equal.
   */
  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (!(o instanceof IndexedWord)) return false;

    // now compare on appropriate keys
    final IndexedWord otherWord = (IndexedWord) o;
    Integer myInd = get(CoreAnnotations.IndexAnnotation.class);
    Integer otherInd = otherWord.get(CoreAnnotations.IndexAnnotation.class);
    if (myInd == null) {
      if (otherInd != null) return false;
    } else if (!myInd.equals(otherInd)) {
      return false;
    }
    Integer mySentInd = get(CoreAnnotations.SentenceIndexAnnotation.class);
    Integer otherSentInd = otherWord.get(CoreAnnotations.SentenceIndexAnnotation.class);
    if (mySentInd == null) {
      if (otherSentInd != null) return false;
    } else if (!mySentInd.equals(otherSentInd)) {
      return false;
    }
    String myDocID = getString(CoreAnnotations.DocIDAnnotation.class);
    String otherDocID = otherWord.getString(CoreAnnotations.DocIDAnnotation.class);
    if (myDocID == null) {
      if (otherDocID != null) return false;
    } else if (!myDocID.equals(otherDocID)) {
      return false;
    }
    if (copyCount() != otherWord.copyCount()) {
      return false;
    }
    // Compare pseudo-positions
    if ((!Double.isNaN(this.pseudoPosition) || !Double.isNaN(otherWord.pseudoPosition))
        && this.pseudoPosition != otherWord.pseudoPosition) {
      return false;
    }
    return true;
  }
Пример #15
0
  /**
   * NOTE: This compareTo is based on and made to be compatible with the one from
   * IndexedFeatureLabel. You <em>must</em> have a DocIDAnnotation, SentenceIndexAnnotation, and
   * IndexAnnotation for this to make sense and be guaranteed to work properly. Currently, it won't
   * error out and will try to return something sensible if these are not defined, but that really
   * isn't proper usage!
   *
   * <p>This compareTo method is based not by value elements like the word(), but on passage
   * position. It puts NO_WORD elements first, and then orders by document, sentence, and word
   * index. If these do not differ, it returns equal.
   *
   * @param w The IndexedWord to compare with
   * @return Whether this is less than w or not in the ordering
   */
  @Override
  public int compareTo(IndexedWord w) {
    if (this.equals(IndexedWord.NO_WORD)) {
      if (w.equals(IndexedWord.NO_WORD)) {
        return 0;
      } else {
        return -1;
      }
    }
    if (w.equals(IndexedWord.NO_WORD)) {
      return 1;
    }

    // Override the default comparator if pseudo-positions are set.
    // This is needed for splicing trees together awkwardly in OpenIE.
    if (!Double.isNaN(w.pseudoPosition) || !Double.isNaN(this.pseudoPosition)) {
      double val = this.pseudoPosition() - w.pseudoPosition();
      if (val < 0) {
        return -1;
      }
      if (val > 0) {
        return 1;
      } else {
        return 0;
      }
    }

    // Otherwise, compare using the normal doc/sentence/token index hierarchy
    String docID = this.getString(CoreAnnotations.DocIDAnnotation.class);
    int docComp = docID.compareTo(w.getString(CoreAnnotations.DocIDAnnotation.class));
    if (docComp != 0) return docComp;

    int sentComp = sentIndex() - w.sentIndex();
    if (sentComp != 0) return sentComp;

    int indexComp = index() - w.index();
    if (indexComp != 0) return indexComp;

    return copyCount() - w.copyCount();
  }
Пример #16
0
    // when finished = false; break; is called, it means I successfully matched.
    @SuppressWarnings("null")
    private void goToNextNodeMatch() {
      decommitVariableGroups(); // make sure variable groups are free.
      decommitNamedNodes();
      decommitNamedRelations();
      finished = true;
      Matcher m = null;
      while (nodeMatchCandidateIterator.hasNext()) {
        if (myNode.reln.getName() != null) {
          String foundReln = namesToRelations.get(myNode.reln.getName());
          nextMatchReln = ((GraphRelation.SearchNodeIterator) nodeMatchCandidateIterator).getReln();
          if ((foundReln != null) && (!nextMatchReln.equals(foundReln))) {
            nextMatch = nodeMatchCandidateIterator.next();
            continue;
          }
        }

        nextMatch = nodeMatchCandidateIterator.next();
        // System.err.println("going to next match: " + nextMatch.word() + " " +
        // myNode.descString + " " + myNode.isLink);
        if (myNode.descString.equals("{}") && myNode.isLink) {
          IndexedWord otherNode = namesToNodes.get(myNode.name);
          if (otherNode != null) {
            if (otherNode.equals(nextMatch)) {
              if (!myNode.negDesc) {
                finished = false;
                break;
              }
            } else {
              if (myNode.negDesc) {
                finished = false;
                break;
              }
            }
          } else {
            boolean found = myNode.nodeAttrMatch(nextMatch, hyp ? sg : sg_aligned, ignoreCase);
            if (found) {
              for (Pair<Integer, String> varGroup : myNode.variableGroups) {
                // if variables have been captured from a regex, they
                // must match any previous matchings
                String thisVariable = varGroup.second();
                String thisVarString = variableStrings.getString(thisVariable);
                if (thisVarString != null && !thisVarString.equals(m.group(varGroup.first()))) {
                  // failed to match a variable
                  found = false;
                  break;
                }
              }

              // nodeAttrMatch already checks negDesc, so no need to
              // check for that here
              finished = false;
              break;
            }
          }
        } else { // try to match the description pattern.
          boolean found = myNode.nodeAttrMatch(nextMatch, hyp ? sg : sg_aligned, ignoreCase);
          if (found) {
            for (Pair<Integer, String> varGroup : myNode.variableGroups) {
              // if variables have been captured from a regex, they
              // must match any previous matchings
              String thisVariable = varGroup.second();
              String thisVarString = variableStrings.getString(thisVariable);
              if (thisVarString != null && !thisVarString.equals(m.group(varGroup.first()))) {
                // failed to match a variable
                found = false;
                break;
              }
            }

            // nodeAttrMatch already checks negDesc, so no need to
            // check for that here
            finished = false;
            break;
          }
        }
      } // end while

      if (!finished) { // I successfully matched.
        resetChild();
        if (myNode.name != null) {
          // note: have to fill in the map as we go for backreferencing
          if (!namesToNodes.containsKey(myNode.name)) {
            // System.err.println("making namedFirst");
            namedFirst = true;
          }
          // System.err.println("adding named node: " + myNode.name + "=" +
          // nextMatch.word());
          namesToNodes.put(myNode.name, nextMatch);
        }
        if (myNode.reln.getName() != null) {
          if (!namesToRelations.containsKey(myNode.reln.getName())) relnNamedFirst = true;
          namesToRelations.put(myNode.reln.getName(), nextMatchReln);
        }
        commitVariableGroups(m); // commit my variable groups.
      }
      // finished is false exiting this if and only if nextChild exists
      // and has a label or backreference that matches
      // (also it will just have been reset)
    }
Пример #17
0
  /**
   * This method searches for an index word in a sentence tree
   *
   * @param wordToFind
   * @param treeToSearch
   * @param expectedPOS The expected POS tag for the result. If this is NULL, the method tries to
   *     find a phrase.
   * @param canGoUp If TRUE the method will walk up the tree to find a phrase.
   * @param skip Set to "1" if you want to find the phrase for "in front of". Set to "0" otherwise.
   * @return The largest matching tree.
   */
  public static Tree match(
      IndexedWord wordToFind, Tree treeToSearch, String expectedPOS, boolean canGoUp, int skip) {
    int end = wordToFind.get(EndIndexAnnotation.class);
    int begin = wordToFind.get(BeginIndexAnnotation.class);

    // first, find whatever is at the word's index
    for (Tree tree : treeToSearch) {
      CoreLabel lbl = ((CoreLabel) tree.label());

      if (lbl != null
          && lbl.get(EndIndexAnnotation.class) != null
          && lbl.get(EndIndexAnnotation.class) == end) {
        if (lbl.get(BeginIndexAnnotation.class) == begin) {
          // we found the first subtree at the word's index
          // now, check if the word here is our searchword
          if (tree.getLeaves().get(0).label().value().equals(wordToFind.value())) {
            // we have found the label.
            Tree candidate = tree;

            if (expectedPOS != null) {
              // if we know our desired POS, just keep walking up the tree to find the first
              // instance of the expected pos
              while (!expectedPOS.equals(candidate.value())) {
                // if we don't have the right POS, just try our parent
                candidate = candidate.parent(treeToSearch);

                if (candidate == null) {
                  return null;
                }
              }
              candidate = skip(candidate, treeToSearch, expectedPOS, skip);
            } else {
              // else walk up the tree again to find the corresponding phrase
              while (!candidate.isPhrasal()) {
                candidate =
                    candidate.parent(treeToSearch); // edu.stanford.nlp.trees.Tree.parent(Tree root)

                if (candidate == null) {
                  return null;
                }
              }
            }

            if (canGoUp) {
              // now keep walking as long as the phrase does not change. this should yield the
              // largest representative phrase for this word.
              String phrase = candidate.value();
              while (phrase.equals(candidate.parent(treeToSearch).value())) {
                candidate = candidate.parent(treeToSearch);

                if (candidate == null) {
                  return null;
                }
              }
            }
            return candidate;
          }
        }
      }
    }
    return null;
  }
 @Override
 public double computeValue(IndexedWord label) {
   double result = (Objects.equals(label.ner(), tag)) ? 1.0 : 0.0;
   return result;
 }
Пример #19
0
    @Override
    public void handle(HttpExchange httpExchange) throws IOException {
      // Set common response headers
      httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*");

      Future<String> json =
          corenlpExecutor.submit(
              () -> {
                try {
                  // Get the document
                  Properties props =
                      new Properties() {
                        {
                          setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse");
                        }
                      };
                  Annotation doc = getDocument(props, httpExchange);
                  if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
                    StanfordCoreNLP pipeline = mkStanfordCoreNLP(props);
                    pipeline.annotate(doc);
                  }

                  // Construct the matcher
                  Map<String, String> params = getURLParams(httpExchange.getRequestURI());
                  // (get the pattern)
                  if (!params.containsKey("pattern")) {
                    respondError("Missing required parameter 'pattern'", httpExchange);
                    return "";
                  }
                  String pattern = params.get("pattern");
                  // (get whether to filter / find)
                  String filterStr = params.getOrDefault("filter", "false");
                  final boolean filter =
                      filterStr.trim().isEmpty()
                          || "true".equalsIgnoreCase(filterStr.toLowerCase());
                  // (create the matcher)
                  final SemgrexPattern regex = SemgrexPattern.compile(pattern);

                  // Run TokensRegex
                  return JSONOutputter.JSONWriter.objectToJSON(
                      (docWriter) -> {
                        if (filter) {
                          // Case: just filter sentences
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          regex
                                              .matcher(
                                                  sentence.get(
                                                      SemanticGraphCoreAnnotations
                                                          .CollapsedCCProcessedDependenciesAnnotation
                                                          .class))
                                              .matches())
                                  .collect(Collectors.toList()));
                        } else {
                          // Case: find matches
                          docWriter.set(
                              "sentences",
                              doc.get(CoreAnnotations.SentencesAnnotation.class)
                                  .stream()
                                  .map(
                                      sentence ->
                                          (Consumer<JSONOutputter.Writer>)
                                              (JSONOutputter.Writer sentWriter) -> {
                                                SemgrexMatcher matcher =
                                                    regex.matcher(
                                                        sentence.get(
                                                            SemanticGraphCoreAnnotations
                                                                .CollapsedCCProcessedDependenciesAnnotation
                                                                .class));
                                                int i = 0;
                                                while (matcher.find()) {
                                                  sentWriter.set(
                                                      Integer.toString(i),
                                                      (Consumer<JSONOutputter.Writer>)
                                                          (JSONOutputter.Writer matchWriter) -> {
                                                            IndexedWord match = matcher.getMatch();
                                                            matchWriter.set("text", match.word());
                                                            matchWriter.set(
                                                                "begin", match.index() - 1);
                                                            matchWriter.set("end", match.index());
                                                            for (String capture :
                                                                matcher.getNodeNames()) {
                                                              matchWriter.set(
                                                                  "$" + capture,
                                                                  (Consumer<JSONOutputter.Writer>)
                                                                      groupWriter -> {
                                                                        IndexedWord node =
                                                                            matcher.getNode(
                                                                                capture);
                                                                        groupWriter.set(
                                                                            "text", node.word());
                                                                        groupWriter.set(
                                                                            "begin",
                                                                            node.index() - 1);
                                                                        groupWriter.set(
                                                                            "end", node.index());
                                                                      });
                                                            }
                                                          });
                                                  i += 1;
                                                }
                                                sentWriter.set("length", i);
                                              }));
                        }
                      });
                } catch (Exception e) {
                  e.printStackTrace();
                  try {
                    respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
                  } catch (IOException ignored) {
                  }
                }
                return "";
              });

      // Send response
      byte[] response = new byte[0];
      try {
        response = json.get(5, TimeUnit.SECONDS).getBytes();
      } catch (InterruptedException | ExecutionException | TimeoutException e) {
        respondError("Timeout when executing Semgrex query", httpExchange);
      }
      if (response.length > 0) {
        httpExchange.getResponseHeaders().add("Content-Type", "text/json");
        httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length));
        httpExchange.sendResponseHeaders(HTTP_OK, response.length);
        httpExchange.getResponseBody().write(response);
        httpExchange.close();
      }
    }
Пример #20
0
  /**
   * Fix some bizarre peculiarities with certain trees. So far, these include:
   *
   * <ul>
   *   <li>Sometimes there's a node from a word to itself. This seems wrong.
   * </ul>
   *
   * @param tree The tree to clean (in place!).
   * @return A list of extra edges, which are valid but were removed.
   */
  public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
    //    assert !isCyclic(tree);

    // Clean nodes
    List<IndexedWord> toDelete = new ArrayList<>();
    for (IndexedWord vertex : tree.vertexSet()) {
      // Clean punctuation
      if (vertex.tag() == null) {
        continue;
      }
      char tag = vertex.backingLabel().tag().charAt(0);
      if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':') {
        if (!tree.outgoingEdgeIterator(vertex)
            .hasNext()) { // This should really never happen, but it does.
          toDelete.add(vertex);
        }
      }
    }
    toDelete.forEach(tree::removeVertex);

    // Clean edges
    Iterator<SemanticGraphEdge> iter = tree.edgeIterable().iterator();
    while (iter.hasNext()) {
      SemanticGraphEdge edge = iter.next();
      if (edge.getDependent().index() == edge.getGovernor().index()) {
        // Clean self-edges
        iter.remove();
      } else if (edge.getRelation().toString().equals("punct")) {
        // Clean punctuation (again)
        if (!tree.outgoingEdgeIterator(edge.getDependent())
            .hasNext()) { // This should really never happen, but it does.
          iter.remove();
        }
      }
    }

    // Remove extra edges
    List<SemanticGraphEdge> extraEdges = new ArrayList<>();
    for (SemanticGraphEdge edge : tree.edgeIterable()) {
      if (edge.isExtra()) {
        if (tree.incomingEdgeList(edge.getDependent()).size() > 1) {
          extraEdges.add(edge);
        }
      }
    }
    extraEdges.forEach(tree::removeEdge);

    // Add apposition edges (simple coref)
    for (SemanticGraphEdge extraEdge :
        new ArrayList<>(extraEdges)) { // note[gabor] prevent concurrent modification exception
      for (SemanticGraphEdge candidateAppos : tree.incomingEdgeIterable(extraEdge.getDependent())) {
        if (candidateAppos.getRelation().toString().equals("appos")) {
          extraEdges.add(
              new SemanticGraphEdge(
                  extraEdge.getGovernor(),
                  candidateAppos.getGovernor(),
                  extraEdge.getRelation(),
                  extraEdge.getWeight(),
                  extraEdge.isExtra()));
        }
      }
      for (SemanticGraphEdge candidateAppos : tree.outgoingEdgeIterable(extraEdge.getDependent())) {
        if (candidateAppos.getRelation().toString().equals("appos")) {
          extraEdges.add(
              new SemanticGraphEdge(
                  extraEdge.getGovernor(),
                  candidateAppos.getDependent(),
                  extraEdge.getRelation(),
                  extraEdge.getWeight(),
                  extraEdge.isExtra()));
        }
      }
    }

    // Brute force ensure tree
    // Remove incoming edges from roots
    List<SemanticGraphEdge> rootIncomingEdges = new ArrayList<>();
    for (IndexedWord root : tree.getRoots()) {
      for (SemanticGraphEdge incomingEdge : tree.incomingEdgeIterable(root)) {
        rootIncomingEdges.add(incomingEdge);
      }
    }
    rootIncomingEdges.forEach(tree::removeEdge);
    // Loop until it becomes a tree.
    boolean changed = true;
    while (changed) { // I just want trees to be trees; is that so much to ask!?
      changed = false;
      List<IndexedWord> danglingNodes = new ArrayList<>();
      List<SemanticGraphEdge> invalidEdges = new ArrayList<>();

      for (IndexedWord vertex : tree.vertexSet()) {
        // Collect statistics
        Iterator<SemanticGraphEdge> incomingIter = tree.incomingEdgeIterator(vertex);
        boolean hasIncoming = incomingIter.hasNext();
        boolean hasMultipleIncoming = false;
        if (hasIncoming) {
          incomingIter.next();
          hasMultipleIncoming = incomingIter.hasNext();
        }

        // Register actions
        if (!hasIncoming && !tree.getRoots().contains(vertex)) {
          danglingNodes.add(vertex);
        } else {
          if (hasMultipleIncoming) {
            for (SemanticGraphEdge edge : new IterableIterator<>(incomingIter)) {
              invalidEdges.add(edge);
            }
          }
        }
      }

      // Perform actions
      for (IndexedWord vertex : danglingNodes) {
        tree.removeVertex(vertex);
        changed = true;
      }
      for (SemanticGraphEdge edge : invalidEdges) {
        tree.removeEdge(edge);
        changed = true;
      }
    }

    // Return
    assert isTree(tree);
    return extraEdges;
  }
Пример #21
0
 public IndexedWord makeCopy(int count) {
   CoreLabel labelCopy = new CoreLabel(label);
   IndexedWord copy = new IndexedWord(labelCopy);
   copy.setCopyCount(count);
   return copy;
 }
Пример #22
0
 public IndexedWord makeSoftCopy(int count) {
   IndexedWord copy = new IndexedWord(label);
   copy.setCopyCount(count);
   copy.original = this;
   return copy;
 }
Пример #23
0
  @Override
  public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

      // create the token annotation
      int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
      int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
      String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
      String lemma = tokenAnn.get(LemmaAnnotation.class);
      Token token = new Token(jCas, begin, end);
      token.setPos(pos);
      token.setLemma(lemma);
      token.addToIndexes();

      // hackery to convert token-level named entity tag into phrase-level tag
      String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
      if (neTag.equals("O") && !lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
      } else {
        if (lastNETag.equals("O")) {
          lastNEBegin = begin;
        } else if (lastNETag.equals(neTag)) {
          // do nothing - begin was already set
        } else {
          NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
          ne.setMentionType(lastNETag);
          ne.addToIndexes();
          lastNEBegin = begin;
        }
        lastNEEnd = end;
      }
      lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
      NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
      ne.setMentionType(lastNETag);
      ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

      // add the sentence annotation
      int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
      int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
      Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
      sentence.addToIndexes();

      // add the syntactic tree annotation
      List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
      Tree tree = sentenceAnn.get(TreeAnnotation.class);
      if (tree.children().length != 1) {
        throw new RuntimeException("Expected single root node, found " + tree);
      }
      tree = tree.firstChild();
      tree.indexSpans(0);
      TopTreebankNode root = new TopTreebankNode(jCas);
      root.setTreebankParse(tree.toString());
      // TODO: root.setTerminals(v)
      this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

      // get the dependencies
      SemanticGraph dependencies =
          sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

      // convert Stanford nodes to UIMA annotations
      List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
      Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
      for (IndexedWord stanfordNode : dependencies.vertexSet()) {
        int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
        int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
        int tokenBegin = tokens.get(indexBegin).getBegin();
        int tokenEnd = tokens.get(indexEnd - 1).getEnd();
        DependencyNode node;
        if (dependencies.getRoots().contains(stanfordNode)) {
          node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
        } else {
          node = new DependencyNode(jCas, tokenBegin, tokenEnd);
        }
        stanfordToUima.put(stanfordNode, node);
      }

      // create relation annotations for each Stanford dependency
      ArrayListMultimap<DependencyNode, DependencyRelation> headRelations =
          ArrayListMultimap.create();
      ArrayListMultimap<DependencyNode, DependencyRelation> childRelations =
          ArrayListMultimap.create();
      for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
        DependencyRelation relation = new DependencyRelation(jCas);
        DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
        DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
        String relationType = stanfordEdge.getRelation().toString();
        if (head == null || child == null || relationType == null) {
          throw new RuntimeException(
              String.format(
                  "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n",
                  relation, child, head));
        }
        relation.setHead(head);
        relation.setChild(child);
        relation.setRelation(relationType);
        relation.addToIndexes();
        headRelations.put(child, relation);
        childRelations.put(head, relation);
      }

      // set the relations for each node annotation
      for (DependencyNode node : stanfordToUima.values()) {
        List<DependencyRelation> heads = headRelations.get(node);
        node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
        if (heads != null) {
          FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
        }
        List<DependencyRelation> children = childRelations.get(node);
        node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
        if (children != null) {
          FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
        }
        node.addToIndexes();
      }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
      spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
      sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
      List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
      for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

        // figure out the character span of the token
        List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
        int begin = tokens.get(corefMention.startIndex - 1).getBegin();
        int end = tokens.get(corefMention.endIndex - 2).getEnd();

        // use an existing named entity mention when possible; otherwise create a new one
        NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
        if (mention == null) {
          mention = new NamedEntityMention(jCas, begin, end);
          mention.addToIndexes();
        }
        mentions.add(mention);
      }

      // create an entity for the mentions
      Collections.sort(
          mentions,
          new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
              return m1.getBegin() - m2.getBegin();
            }
          });

      // create mentions and add them to entity
      NamedEntity entity = new NamedEntity(jCas);
      entity.setMentions(new FSArray(jCas, mentions.size()));
      int index = 0;
      for (NamedEntityMention mention : mentions) {
        mention.setMentionedEntity(entity);
        entity.setMentions(index, mention);
        index += 1;
      }
      entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
      if (mention.getMentionedEntity() == null) {
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, 1));
        entity.setMentions(0, mention);
        mention.setMentionedEntity(entity);
        entity.getMentions();
        entities.add(entity);
      }
    }

    // sort entities by document order
    Collections.sort(
        entities,
        new Comparator<NamedEntity>() {
          @Override
          public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
          }

          private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention :
                JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
              if (mention.getBegin() < min) {
                min = mention.getBegin();
              }
            }
            return min;
          }
        });

    // add entities to document
    for (NamedEntity entity : entities) {
      entity.addToIndexes();
    }
  }
Пример #24
0
 /** {@inheritDoc} */
 @Override
 public LabelFactory labelFactory() {
   return IndexedWord.factory();
 }