/**
  * The basic method for splitting off a clause of a tree. This modifies the tree in place.
  *
  * @param tree The tree to split a clause from.
  * @param toKeep The edge representing the clause to keep.
  */
 static void splitToChildOfEdge(SemanticGraph tree, SemanticGraphEdge toKeep) {
   Queue<IndexedWord> fringe = new LinkedList<>();
   List<IndexedWord> nodesToRemove = new ArrayList<>();
   // Find nodes to remove
   // (from the root)
   for (IndexedWord root : tree.getRoots()) {
     nodesToRemove.add(root);
     for (SemanticGraphEdge out : tree.outgoingEdgeIterable(root)) {
       if (!out.equals(toKeep)) {
         fringe.add(out.getDependent());
       }
     }
   }
   // (recursively)
   while (!fringe.isEmpty()) {
     IndexedWord node = fringe.poll();
     nodesToRemove.add(node);
     for (SemanticGraphEdge out : tree.outgoingEdgeIterable(node)) {
       if (!out.equals(toKeep)) {
         fringe.add(out.getDependent());
       }
     }
   }
   // Remove nodes
   nodesToRemove.forEach(tree::removeVertex);
   // Set new root
   tree.setRoot(toKeep.getDependent());
 }
Пример #2
0
  /**
   * Strip away case edges, if the incoming edge is a preposition. This replicates the behavior of
   * the old Stanford dependencies on universal dependencies.
   *
   * @param tree The tree to modify in place.
   */
  public static void stripPrepCases(SemanticGraph tree) {
    // Find incoming case edges that have an 'nmod' incoming edge
    List<SemanticGraphEdge> toClean = new ArrayList<>();
    for (SemanticGraphEdge edge : tree.edgeIterable()) {
      if ("case".equals(edge.getRelation().toString())) {
        boolean isPrepTarget = false;
        for (SemanticGraphEdge incoming : tree.incomingEdgeIterable(edge.getGovernor())) {
          if ("nmod".equals(incoming.getRelation().getShortName())) {
            isPrepTarget = true;
            break;
          }
        }
        if (isPrepTarget && !tree.outgoingEdgeIterator(edge.getDependent()).hasNext()) {
          toClean.add(edge);
        }
      }
    }

    // Delete these edges
    for (SemanticGraphEdge edge : toClean) {
      tree.removeEdge(edge);
      tree.removeVertex(edge.getDependent());
      assert isTree(tree);
    }
  }
  /**
   * The basic method for splitting off a clause of a tree. This modifies the tree in place. This
   * method addtionally follows ref edges.
   *
   * @param tree The tree to split a clause from.
   * @param toKeep The edge representing the clause to keep.
   */
  @SuppressWarnings("unchecked")
  private void simpleClause(SemanticGraph tree, SemanticGraphEdge toKeep) {
    splitToChildOfEdge(tree, toKeep);

    // Follow 'ref' edges
    Map<IndexedWord, IndexedWord> refReplaceMap = new HashMap<>();
    // (find replacements)
    for (IndexedWord vertex : tree.vertexSet()) {
      for (SemanticGraphEdge edge : extraEdgesByDependent.get(vertex)) {
        if ("ref".equals(edge.getRelation().toString())
            && // it's a ref edge...
            !tree.containsVertex(
                edge.getGovernor())) { // ...that doesn't already exist in the tree.
          refReplaceMap.put(vertex, edge.getGovernor());
        }
      }
    }
    // (do replacements)
    for (Map.Entry<IndexedWord, IndexedWord> entry : refReplaceMap.entrySet()) {
      Iterator<SemanticGraphEdge> iter = tree.incomingEdgeIterator(entry.getKey());
      if (!iter.hasNext()) {
        continue;
      }
      SemanticGraphEdge incomingEdge = iter.next();
      IndexedWord governor = incomingEdge.getGovernor();
      tree.removeVertex(entry.getKey());
      addSubtree(
          tree,
          governor,
          incomingEdge.getRelation().toString(),
          this.tree,
          entry.getValue(),
          this.tree.incomingEdgeList(tree.getFirstRoot()));
    }
  }
Пример #4
0
  private String findNextParagraphSpeaker(
      List<CoreMap> paragraph, int paragraphOffset, Dictionaries dict) {
    CoreMap lastSent = paragraph.get(paragraph.size() - 1);
    String speaker = "";
    for (CoreLabel w : lastSent.get(CoreAnnotations.TokensAnnotation.class)) {
      if (w.get(CoreAnnotations.LemmaAnnotation.class).equals("report")
          || w.get(CoreAnnotations.LemmaAnnotation.class).equals("say")) {
        String word = w.get(CoreAnnotations.TextAnnotation.class);
        SemanticGraph dependency =
            lastSent.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        IndexedWord t = dependency.getNodeByWordPattern(word);

        for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(t)) {
          if (child.first().getShortName().equals("nsubj")) {
            int subjectIndex = child.second().index(); // start from 1
            IntTuple headPosition = new IntTuple(2);
            headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
            headPosition.set(1, subjectIndex - 1);
            if (mentionheadPositions.containsKey(headPosition)
                && mentionheadPositions.get(headPosition).nerString.startsWith("PER")) {
              speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
            }
          }
        }
      }
    }
    return speaker;
  }
Пример #5
0
  public static void main(String[] args) {
    SentenceDAO sentenceDAO = new SentenceDAOImpl();
    List<Sentence> sentences = sentenceDAO.findAll();
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    int i = 0;
    for (Sentence sentence : sentences) {
      if (sentence.getPredicate() == null) {
        try {
          System.out.println(i++);
          String text = sentence.getContent();
          Annotation annotation = new Annotation(text);
          pipeline.annotate(annotation);
          for (CoreMap core : annotation.get(SentencesAnnotation.class)) {
            SemanticGraph graph = core.get(CollapsedCCProcessedDependenciesAnnotation.class);

            sentence.setPredicate(graph.getFirstRoot().lemma());
          }
          sentenceDAO.save(sentence);
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
    }

    // System.out.println(sentence.getWords());

  }
 private void verifyGraph(SemanticGraph expected, SemanticGraph result) {
   if (expected == null) {
     assertEquals(expected, result);
     return;
   }
   assertEquals(expected.vertexSet(), result.vertexSet());
   // TODO: Fix the equals for the DirectedMultiGraph so we can compare the two graphs directly
   assertEquals(expected.toString(), result.toString());
 }
  private static void testDependencyTree() {
    String testSentence = "Now is the time for all good men to come to the aid of their country.";

    CoreNlpParser parser = new CoreNlpParser();
    List<SemanticGraph> result = parser.getTextDependencyTree(testSentence);
    for (SemanticGraph graph : result) {
      graph.prettyPrint();
    }
  }
Пример #8
0
 /**
  * Decides whether this word has a direct object.
  *
  * @param word the word to analyse
  * @param graph the sentence to which this word belongs
  * @return TRUE, if a direct object is present for this verb
  */
 static boolean hasDirectObjectNP(IndexedWord word, SemanticGraph graph) {
   GrammaticalRelation reln =
       edu.stanford.nlp.trees.GrammaticalRelation.getRelation(
           edu.stanford.nlp.trees.EnglishGrammaticalRelations.DirectObjectGRAnnotation.class);
   if (graph.hasChildWithReln(word, reln)) {
     String pos = graph.getChildWithReln(word, reln).get(PartOfSpeechAnnotation.class);
     if (pos.equalsIgnoreCase("NN")) {
       return true;
     }
   }
   return false;
 }
 /**
  * A helper to add a single word to a given dependency tree
  *
  * @param toModify The tree to add the word to.
  * @param root The root of the tree where we should be adding the word.
  * @param rel The relation to add the word with.
  * @param coreLabel The word to add.
  */
 @SuppressWarnings("UnusedDeclaration")
 private static void addWord(
     SemanticGraph toModify, IndexedWord root, String rel, CoreLabel coreLabel) {
   IndexedWord dependent = new IndexedWord(coreLabel);
   toModify.addVertex(dependent);
   toModify.addEdge(
       root,
       dependent,
       GrammaticalRelation.valueOf(Language.English, rel),
       Double.NEGATIVE_INFINITY,
       false);
 }
  /**
   * Thread safety note: nothing special is done to ensure the thread safety of the
   * GrammaticalStructureFactory. However, both the EnglishGrammaticalStructureFactory and the
   * ChineseGrammaticalStructureFactory are thread safe.
   */
  public static void fillInParseAnnotations(
      boolean verbose,
      boolean buildGraphs,
      GrammaticalStructureFactory gsf,
      CoreMap sentence,
      Tree tree) {
    // make sure all tree nodes are CoreLabels
    // TODO: why isn't this always true? something fishy is going on
    ParserAnnotatorUtils.convertToCoreLabels(tree);

    // index nodes, i.e., add start and end token positions to all nodes
    // this is needed by other annotators down stream, e.g., the NFLAnnotator
    tree.indexSpans(0);

    sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
    if (verbose) {
      System.err.println("Tree is:");
      tree.pennPrint(System.err);
    }

    if (buildGraphs) {
      String docID = sentence.get(CoreAnnotations.DocIDAnnotation.class);
      if (docID == null) {
        docID = "";
      }

      Integer sentenceIndex = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class);
      int index = sentenceIndex == null ? 0 : sentenceIndex;

      // generate the dependency graph
      SemanticGraph deps =
          SemanticGraphFactory.generateCollapsedDependencies(
              gsf.newGrammaticalStructure(tree), docID, index);
      SemanticGraph uncollapsedDeps =
          SemanticGraphFactory.generateUncollapsedDependencies(
              gsf.newGrammaticalStructure(tree), docID, index);
      SemanticGraph ccDeps =
          SemanticGraphFactory.generateCCProcessedDependencies(
              gsf.newGrammaticalStructure(tree), docID, index);
      if (verbose) {
        System.err.println("SDs:");
        System.err.println(deps.toString("plain"));
      }
      sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
      sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
      sentence.set(
          SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
    }

    setMissingTags(sentence, tree);
  }
 /**
  * Stips aux and mark edges when we are splitting into a clause.
  *
  * @param toModify The tree we are stripping the edges from.
  */
 private void stripAuxMark(SemanticGraph toModify) {
   List<SemanticGraphEdge> toClean = new ArrayList<>();
   for (SemanticGraphEdge edge : toModify.outgoingEdgeIterable(toModify.getFirstRoot())) {
     String rel = edge.getRelation().toString();
     if (("aux".equals(rel) || "mark".equals(rel))
         && !toModify.outgoingEdgeIterator(edge.getDependent()).hasNext()) {
       toClean.add(edge);
     }
   }
   for (SemanticGraphEdge edge : toClean) {
     toModify.removeEdge(edge);
     toModify.removeVertex(edge.getDependent());
   }
 }
  /**
   * Parse a CoNLL formatted tree into a SemanticGraph object (along with a list of tokens).
   *
   * @param conll The CoNLL formatted tree.
   * @return A pair of a SemanticGraph and a token list, corresponding to the parse of the sentence
   *     and to tokens in the sentence.
   */
  protected Pair<SemanticGraph, List<CoreLabel>> mkTree(String conll) {
    List<CoreLabel> sentence = new ArrayList<>();
    SemanticGraph tree = new SemanticGraph();
    for (String line : conll.split("\n")) {
      if (line.trim().equals("")) {
        continue;
      }
      String[] fields = line.trim().split("\\s+");
      int index = Integer.parseInt(fields[0]);
      String word = fields[1];
      CoreLabel label = IETestUtils.mkWord(word, index);
      sentence.add(label);
      if (fields[2].equals("0")) {
        tree.addRoot(new IndexedWord(label));
      } else {
        tree.addVertex(new IndexedWord(label));
      }
      if (fields.length > 4) {
        label.setTag(fields[4]);
      }
      if (fields.length > 5) {
        label.setNER(fields[5]);
      }
      if (fields.length > 6) {
        label.setLemma(fields[6]);
      }
    }
    int i = 0;
    for (String line : conll.split("\n")) {
      if (line.trim().equals("")) {
        continue;
      }
      String[] fields = line.trim().split("\\s+");
      int parent = Integer.parseInt(fields[2]);
      String reln = fields[3];
      if (parent > 0) {
        tree.addEdge(
            new IndexedWord(sentence.get(parent - 1)),
            new IndexedWord(sentence.get(i)),
            new GrammaticalRelation(Language.UniversalEnglish, reln, null, null),
            1.0,
            false);
      }
      i += 1;
    }

    return Pair.makePair(tree, sentence);
  }
Пример #13
0
  /**
   * Generate the training features from the CoNLL input file.
   *
   * @return Dataset of feature vectors
   * @throws Exception
   */
  public GeneralDataset<String, String> generateFeatureVectors(Properties props) throws Exception {

    GeneralDataset<String, String> dataset = new Dataset<>();

    Dictionaries dict = new Dictionaries(props);
    MentionExtractor mentionExtractor = new CoNLLMentionExtractor(dict, props, new Semantics(dict));

    Document document;
    while ((document = mentionExtractor.nextDoc()) != null) {
      setTokenIndices(document);
      document.extractGoldCorefClusters();
      Map<Integer, CorefCluster> entities = document.goldCorefClusters;

      // Generate features for coreferent mentions with class label 1
      for (CorefCluster entity : entities.values()) {
        for (Mention mention : entity.getCorefMentions()) {
          // Ignore verbal mentions
          if (mention.headWord.tag().startsWith("V")) continue;

          IndexedWord head = mention.dependency.getNodeByIndexSafe(mention.headWord.index());
          if (head == null) continue;
          ArrayList<String> feats = mention.getSingletonFeatures(dict);
          dataset.add(new BasicDatum<>(feats, "1"));
        }
      }

      // Generate features for singletons with class label 0
      ArrayList<CoreLabel> gold_heads = new ArrayList<>();
      for (Mention gold_men : document.allGoldMentions.values()) {
        gold_heads.add(gold_men.headWord);
      }
      for (Mention predicted_men : document.allPredictedMentions.values()) {
        SemanticGraph dep = predicted_men.dependency;
        IndexedWord head = dep.getNodeByIndexSafe(predicted_men.headWord.index());
        if (head == null) continue;

        // Ignore verbal mentions
        if (predicted_men.headWord.tag().startsWith("V")) continue;
        // If the mention is in the gold set, it is not a singleton and thus ignore
        if (gold_heads.contains(predicted_men.headWord)) continue;

        dataset.add(new BasicDatum<>(predicted_men.getSingletonFeatures(dict), "0"));
      }
    }

    dataset.summaryStatistics();
    return dataset;
  }
Пример #14
0
  public static DependencyParse parse(String text) {

    if (pipeline == null) {
      loadModels();
    }

    DependencyParse parse = new DependencyParse();

    Annotation document = new Annotation(text);

    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {

      SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);

      IndexedWord root = dependencies.getFirstRoot();

      parse.setHeadNode(root.index());

      List<SemanticGraphEdge> edges = dependencies.edgeListSorted();

      // System.out.println(edges);
      for (SemanticGraphEdge t : edges) {

        String dep = t.getDependent().originalText();
        int depIndex = t.getDependent().index();
        String depPOS = t.getDependent().tag();
        int depStart = t.getDependent().beginPosition();
        int depEnd = t.getDependent().endPosition();

        String gov = t.getGovernor().originalText();
        int govIndex = t.getGovernor().index();
        String govPOS = t.getGovernor().tag();
        int govStart = t.getGovernor().beginPosition();
        int govEnd = t.getGovernor().endPosition();

        parse.addNode(govIndex, gov, govPOS, govStart, govEnd);
        parse.addNode(depIndex, dep, depPOS, depStart, depEnd);

        parse.addEdge(depIndex, govIndex, t.getRelation().getShortName());
      }
    }

    return parse;
  }
Пример #15
0
 @Override
 public SemgrexMatcher matcher(
     SemanticGraph sg,
     Alignment alignment,
     SemanticGraph sg_align,
     boolean hyp,
     IndexedWord node,
     Map<String, IndexedWord> namesToNodes,
     Map<String, String> namesToRelations,
     VariableStrings variableStrings,
     boolean ignoreCase) {
   // System.err.println("making matcher: " +
   // ((reln.equals(GraphRelation.ALIGNED_ROOT)) ? false : hyp));
   return new NodeMatcher(
       this,
       sg,
       alignment,
       sg_align,
       (reln.equals(GraphRelation.ALIGNED_ROOT)) ? false : hyp,
       (reln.equals(GraphRelation.ALIGNED_ROOT)) ? sg_align.getFirstRoot() : node,
       namesToNodes,
       namesToRelations,
       variableStrings,
       ignoreCase);
 }
Пример #16
0
 /**
  * Returns any particle this <code>verb</code> may have
  *
  * @param verb
  * @param graph
  * @return
  */
 static IndexedWord getParticle(final IndexedWord verb, final SemanticGraph graph) {
   GrammaticalRelation reln =
       edu.stanford.nlp.trees.GrammaticalRelation.getRelation(
           edu.stanford.nlp.trees.EnglishGrammaticalRelations.PhrasalVerbParticleGRAnnotation
               .class);
   return graph.getChildWithReln(verb, reln);
 }
Пример #17
0
 static boolean hasPrepMod(IndexedWord word, SemanticGraph graph) {
   GrammaticalRelation reln =
       edu.stanford.nlp.trees.GrammaticalRelation.getRelation(
           edu.stanford.nlp.trees.EnglishGrammaticalRelations.PrepositionalModifierGRAnnotation
               .class);
   return graph.hasChildWithReln(word, reln);
 }
Пример #18
0
 static Boolean hasParticle(IndexedWord word, SemanticGraph graph) {
   GrammaticalRelation reln =
       edu.stanford.nlp.trees.GrammaticalRelation.getRelation(
           edu.stanford.nlp.trees.EnglishGrammaticalRelations.PhrasalVerbParticleGRAnnotation
               .class);
   return graph.hasChildWithReln(word, reln);
 }
 /**
  * Create a searcher manually, suppling a dependency tree, an optional classifier for when to
  * split clauses, and a featurizer for that classifier. You almost certainly want to use {@link
  * ClauseSplitter#load(String)} instead of this constructor.
  *
  * @param tree The dependency tree to search over.
  * @param assumedTruth The assumed truth of the tree (relevant for natural logic inference). If in
  *     doubt, pass in true.
  * @param isClauseClassifier The classifier for whether a given dependency arc should be a new
  *     clause. If this is not given, all arcs are treated as clause separators.
  * @param featurizer The featurizer for the classifier. If no featurizer is given, one should be
  *     given in {@link ClauseSplitterSearchProblem#search(java.util.function.Predicate,
  *     Classifier, Map, java.util.function.Function, int)}, or else the classifier will be
  *     useless.
  * @see ClauseSplitter#load(String)
  */
 protected ClauseSplitterSearchProblem(
     SemanticGraph tree,
     boolean assumedTruth,
     Optional<Classifier<ClauseSplitter.ClauseClassifierLabel, String>> isClauseClassifier,
     Optional<
             Function<
                 Triple<
                     ClauseSplitterSearchProblem.State,
                     ClauseSplitterSearchProblem.Action,
                     ClauseSplitterSearchProblem.State>,
                 Counter<String>>>
         featurizer) {
   this.tree = new SemanticGraph(tree);
   this.assumedTruth = assumedTruth;
   this.isClauseClassifier = isClauseClassifier;
   this.featurizer = featurizer;
   // Index edges
   this.tree.edgeIterable().forEach(edgeToIndex::addToIndex);
   // Get length
   List<IndexedWord> sortedVertices = tree.vertexListSorted();
   sentenceLength = sortedVertices.get(sortedVertices.size() - 1).index();
   // Register extra edges
   for (IndexedWord vertex : sortedVertices) {
     extraEdgesByGovernor.put(vertex, new ArrayList<>());
     extraEdgesByDependent.put(vertex, new ArrayList<>());
   }
   List<SemanticGraphEdge> extraEdges = Util.cleanTree(this.tree);
   assert Util.isTree(this.tree);
   for (SemanticGraphEdge edge : extraEdges) {
     extraEdgesByGovernor.get(edge.getGovernor()).add(edge);
     extraEdgesByDependent.get(edge.getDependent()).add(edge);
   }
 }
Пример #20
0
  @SuppressWarnings("unchecked")
  public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean ignoreCase) {
    // System.out.println(node.word());
    if (isRoot) return (negDesc ? !sg.getRoots().contains(node) : sg.getRoots().contains(node));
    // System.out.println("not root");
    if (isEmpty)
      return (negDesc ? !node.equals(IndexedWord.NO_WORD) : node.equals(IndexedWord.NO_WORD));

    // System.err.println("Attributes are: " + attributes);
    for (Map.Entry<String, Pattern> attr : attributes.entrySet()) {
      String key = attr.getKey();
      // System.out.println(key);
      String nodeValue;
      // if (key.equals("idx"))
      // nodeValue = Integer.toString(node.index());
      // else {

      Class c = Env.lookupAnnotationKey(env, key);
      // find class for the key

      Object value = node.get(c);
      if (value == null) nodeValue = null;
      else nodeValue = value.toString();
      // }
      // System.out.println(nodeValue);
      if (nodeValue == null) return negDesc;
      Pattern valuePattern = attr.getValue();
      boolean matches = false;
      if (ignoreCase) {
        if (Pattern.compile(valuePattern.pattern(), Pattern.CASE_INSENSITIVE)
            .matcher(nodeValue)
            .matches()) matches = true;
      } else {
        if (nodeValue.matches(valuePattern.pattern())) matches = true;
      }
      if (!matches) {

        // System.out.println("doesn't match");
        // System.out.println("");
        return negDesc;
      }
    }
    // System.out.println("matches");
    // System.out.println("");
    return !negDesc;
  }
Пример #21
0
  /**
   * A little utility function to make sure a SemanticGraph is a tree.
   *
   * @param tree The tree to check.
   * @return True if this {@link edu.stanford.nlp.semgraph.SemanticGraph} is a tree (versus a DAG,
   *     or Graph).
   */
  public static boolean isTree(SemanticGraph tree) {
    for (IndexedWord vertex : tree.vertexSet()) {
      // Check one and only one incoming edge
      if (tree.getRoots().contains(vertex)) {
        if (tree.incomingEdgeIterator(vertex).hasNext()) {
          return false;
        }
      } else {
        Iterator<SemanticGraphEdge> iter = tree.incomingEdgeIterator(vertex);
        if (!iter.hasNext()) {
          return false;
        }
        iter.next();
        if (iter.hasNext()) {
          return false;
        }
      }
      // Check incoming and outgoing edges match
      for (SemanticGraphEdge edge : tree.outgoingEdgeIterable(vertex)) {
        boolean foundReverse = false;
        for (SemanticGraphEdge reverse : tree.incomingEdgeIterable(edge.getDependent())) {
          if (reverse == edge) {
            foundReverse = true;
          }
        }
        if (!foundReverse) {
          return false;
        }
      }
      for (SemanticGraphEdge edge : tree.incomingEdgeIterable(vertex)) {
        boolean foundReverse = false;
        for (SemanticGraphEdge reverse : tree.outgoingEdgeIterable(edge.getGovernor())) {
          if (reverse == edge) {
            foundReverse = true;
          }
        }
        if (!foundReverse) {
          return false;
        }
      }
    }

    // Check for cycles
    if (isCyclic(tree)) {
      return false;
    }

    // Check topological sort -- sometimes fails?
    //    try {
    //      tree.topologicalSort();
    //    } catch (Exception e) {
    //      e.printStackTrace();
    //      return false;
    //    }
    return true;
  }
Пример #22
0
 /**
  * This method decides whether a given <code>verb</code> has a passive subject or a passive
  * auxiliary.
  *
  * @param verb
  * @param graph
  * @return
  */
 static boolean isPassive(IndexedWord verb, SemanticGraph graph) {
   // Examples:
   // "Dole was defeated by Clinton" nsubjpass(defeated, Dole)
   GrammaticalRelation nsubjpass =
       GrammaticalRelation.getRelation(NominalPassiveSubjectGRAnnotation.class);
   // "That she lied was suspected by everyone" csubjpass(suspected, lied)
   GrammaticalRelation csubjpass =
       GrammaticalRelation.getRelation(ClausalPassiveSubjectGRAnnotation.class);
   // "Kennedy was killed" auxpass(killed, was)
   GrammaticalRelation auxrel =
       GrammaticalRelation.getRelation(EnglishGrammaticalRelations.AuxPassiveGRAnnotation.class);
   Boolean passive = false;
   passive = passive || graph.hasChildWithReln(verb, nsubjpass);
   passive = passive || graph.hasChildWithReln(verb, csubjpass);
   passive = passive || graph.hasChildWithReln(verb, auxrel);
   return passive;
 }
Пример #23
0
 /**
  * Gets the prepositions for this word in this sentence
  *
  * @param startingWord
  * @param graph Sentence
  * @param preposition A seeding expression. Ex: "in" or "in_front_of"
  * @return
  */
 public static List<IndexedWord> getPrepRelations(
     IndexedWord startingWord, SemanticGraph graph, String preposition) {
   // GrammaticalRelation prepreln =
   // GrammaticalRelation.getRelation(PrepositionalModifierGRAnnotation.class);
   GrammaticalRelation prepreln = EnglishGrammaticalRelations.getPrep(preposition);
   // checking rule: root->prep_in->det
   return graph.getChildrenWithReln(startingWord, prepreln);
 }
Пример #24
0
  private boolean findSpeaker(
      int utterNum,
      int sentNum,
      List<CoreMap> sentences,
      int startIndex,
      int endIndex,
      Dictionaries dict) {
    List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
    for (int i = startIndex; i < endIndex; i++) {
      if (sent.get(i).get(CoreAnnotations.UtteranceAnnotation.class) != 0) continue;
      String lemma = sent.get(i).get(CoreAnnotations.LemmaAnnotation.class);
      String word = sent.get(i).get(CoreAnnotations.TextAnnotation.class);
      if (dict.reportVerb.contains(lemma)) {
        // find subject
        SemanticGraph dependency =
            sentences
                .get(sentNum)
                .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        IndexedWord w = dependency.getNodeByWordPattern(word);

        if (w != null) {
          for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(w)) {
            if (child.first().getShortName().equals("nsubj")) {
              String subjectString = child.second().word();
              int subjectIndex = child.second().index(); // start from 1
              IntTuple headPosition = new IntTuple(2);
              headPosition.set(0, sentNum);
              headPosition.set(1, subjectIndex - 1);
              String speaker;
              if (mentionheadPositions.containsKey(headPosition)) {
                speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
              } else {
                speaker = subjectString;
              }
              speakers.put(utterNum, speaker);
              return true;
            }
          }
        } else {
          SieveCoreferenceSystem.logger.warning("Cannot find node in dependency for word " + word);
        }
      }
    }
    return false;
  }
Пример #25
0
 /**
  * Get a {@link SemgrexMatcher} for this pattern in this graph, with some initial conditions on
  * the variable assignments
  */
 public SemgrexMatcher matcher(SemanticGraph sg, Map<String, IndexedWord> variables) {
   return matcher(
       sg,
       sg.getFirstRoot(),
       variables,
       Generics.<String, String>newHashMap(),
       new VariableStrings(),
       false);
 }
Пример #26
0
 /**
  * Get a {@link SemgrexMatcher} for this pattern in this graph.
  *
  * @param sg the SemanticGraph to match on
  * @param ignoreCase will ignore case for matching a pattern with a node; not implemented by
  *     Coordination Pattern
  * @return a SemgrexMatcher
  */
 public SemgrexMatcher matcher(SemanticGraph sg, boolean ignoreCase) {
   return matcher(
       sg,
       sg.getFirstRoot(),
       Generics.<String, IndexedWord>newHashMap(),
       Generics.<String, String>newHashMap(),
       new VariableStrings(),
       ignoreCase);
 }
Пример #27
0
  private void testParseTree() {
    try {
      Properties props = new Properties();
      props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
      StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

      // read some text in the text variable
      String text = "Give me a list of all bandleaders that play trumpet.";

      // create an empty Annotation just with the given text
      Annotation document = new Annotation(text);

      // run all Annotators on this text
      pipeline.annotate(document);

      // these are all the sentences in this document
      // a CoreMap is essentially a Map that uses class objects as keys and has values with custom
      // types
      List<CoreMap> sentences = document.get(SentencesAnnotation.class);

      for (CoreMap sentence : sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods

        // this is the parse tree of the current sentence
        Tree tree = sentence.get(TreeAnnotation.class);

        // this is the Stanford dependency graph of the current sentence
        SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);

        Set<IndexedWord> vertices = dependencies.vertexSet();
        List<SemanticGraphEdge> edges = dependencies.edgeListSorted();

        for (SemanticGraphEdge e : edges) {}

        for (IndexedWord i : vertices) {
          System.out.println(i.toString());
        }
      }

    } catch (Exception e) {

    }
  }
Пример #28
0
  /**
   * Returns the primary thing this sentence is talking about. More precisely: Returns any subject
   * or a passive subject of the sentence, or the root if none applies.
   *
   * @param graph
   * @return
   */
  public static IndexedWord getSubject(SemanticGraph graph) {
    if (graph.isEmpty()) {
      return null;
    }
    GrammaticalRelation[] subjects = {
      EnglishGrammaticalRelations.NOMINAL_SUBJECT,
      EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT,
      EnglishGrammaticalRelations.CLAUSAL_SUBJECT,
      EnglishGrammaticalRelations.CLAUSAL_PASSIVE_SUBJECT
    };
    IndexedWord firstRoot = graph.getFirstRoot();
    List<IndexedWord> children = graph.getChildrenWithRelns(firstRoot, Arrays.asList(subjects));
    if (children != null && children.size() > 0) {
      assert children.size()
          == 1; // not really dangerous. But we need to change our implementation to return a list,
                // if there are more than one subject.

      return children.get(0);
    }
    // return null;
    return graph.getFirstRoot(); // in a subject-less sentence, the root is as good as the subject
  }
Пример #29
0
 /**
  * Determine if a tree is cyclic.
  *
  * @param tree The tree to check.
  * @return True if the tree has at least once cycle in it.
  */
 public static boolean isCyclic(SemanticGraph tree) {
   for (IndexedWord vertex : tree.vertexSet()) {
     if (tree.getRoots().contains(vertex)) {
       continue;
     }
     IndexedWord node = tree.incomingEdgeIterator(vertex).next().getGovernor();
     Set<IndexedWord> seen = new HashSet<>();
     seen.add(vertex);
     while (node != null) {
       if (seen.contains(node)) {
         return true;
       }
       seen.add(node);
       if (tree.incomingEdgeIterator(node).hasNext()) {
         node = tree.incomingEdgeIterator(node).next().getGovernor();
       } else {
         node = null;
       }
     }
   }
   return false;
 }
Пример #30
0
 public SemgrexMatcher matcher(
     SemanticGraph hypGraph, Alignment alignment, SemanticGraph txtGraph, boolean ignoreCase) {
   return matcher(
       hypGraph,
       alignment,
       txtGraph,
       true,
       hypGraph.getFirstRoot(),
       Generics.<String, IndexedWord>newHashMap(),
       Generics.<String, String>newHashMap(),
       new VariableStrings(),
       ignoreCase);
 }