/** * The basic method for splitting off a clause of a tree. This modifies the tree in place. * * @param tree The tree to split a clause from. * @param toKeep The edge representing the clause to keep. */ static void splitToChildOfEdge(SemanticGraph tree, SemanticGraphEdge toKeep) { Queue<IndexedWord> fringe = new LinkedList<>(); List<IndexedWord> nodesToRemove = new ArrayList<>(); // Find nodes to remove // (from the root) for (IndexedWord root : tree.getRoots()) { nodesToRemove.add(root); for (SemanticGraphEdge out : tree.outgoingEdgeIterable(root)) { if (!out.equals(toKeep)) { fringe.add(out.getDependent()); } } } // (recursively) while (!fringe.isEmpty()) { IndexedWord node = fringe.poll(); nodesToRemove.add(node); for (SemanticGraphEdge out : tree.outgoingEdgeIterable(node)) { if (!out.equals(toKeep)) { fringe.add(out.getDependent()); } } } // Remove nodes nodesToRemove.forEach(tree::removeVertex); // Set new root tree.setRoot(toKeep.getDependent()); }
/** * Strip away case edges, if the incoming edge is a preposition. This replicates the behavior of * the old Stanford dependencies on universal dependencies. * * @param tree The tree to modify in place. */ public static void stripPrepCases(SemanticGraph tree) { // Find incoming case edges that have an 'nmod' incoming edge List<SemanticGraphEdge> toClean = new ArrayList<>(); for (SemanticGraphEdge edge : tree.edgeIterable()) { if ("case".equals(edge.getRelation().toString())) { boolean isPrepTarget = false; for (SemanticGraphEdge incoming : tree.incomingEdgeIterable(edge.getGovernor())) { if ("nmod".equals(incoming.getRelation().getShortName())) { isPrepTarget = true; break; } } if (isPrepTarget && !tree.outgoingEdgeIterator(edge.getDependent()).hasNext()) { toClean.add(edge); } } } // Delete these edges for (SemanticGraphEdge edge : toClean) { tree.removeEdge(edge); tree.removeVertex(edge.getDependent()); assert isTree(tree); } }
/** * The basic method for splitting off a clause of a tree. This modifies the tree in place. This * method addtionally follows ref edges. * * @param tree The tree to split a clause from. * @param toKeep The edge representing the clause to keep. */ @SuppressWarnings("unchecked") private void simpleClause(SemanticGraph tree, SemanticGraphEdge toKeep) { splitToChildOfEdge(tree, toKeep); // Follow 'ref' edges Map<IndexedWord, IndexedWord> refReplaceMap = new HashMap<>(); // (find replacements) for (IndexedWord vertex : tree.vertexSet()) { for (SemanticGraphEdge edge : extraEdgesByDependent.get(vertex)) { if ("ref".equals(edge.getRelation().toString()) && // it's a ref edge... !tree.containsVertex( edge.getGovernor())) { // ...that doesn't already exist in the tree. refReplaceMap.put(vertex, edge.getGovernor()); } } } // (do replacements) for (Map.Entry<IndexedWord, IndexedWord> entry : refReplaceMap.entrySet()) { Iterator<SemanticGraphEdge> iter = tree.incomingEdgeIterator(entry.getKey()); if (!iter.hasNext()) { continue; } SemanticGraphEdge incomingEdge = iter.next(); IndexedWord governor = incomingEdge.getGovernor(); tree.removeVertex(entry.getKey()); addSubtree( tree, governor, incomingEdge.getRelation().toString(), this.tree, entry.getValue(), this.tree.incomingEdgeList(tree.getFirstRoot())); } }
private String findNextParagraphSpeaker( List<CoreMap> paragraph, int paragraphOffset, Dictionaries dict) { CoreMap lastSent = paragraph.get(paragraph.size() - 1); String speaker = ""; for (CoreLabel w : lastSent.get(CoreAnnotations.TokensAnnotation.class)) { if (w.get(CoreAnnotations.LemmaAnnotation.class).equals("report") || w.get(CoreAnnotations.LemmaAnnotation.class).equals("say")) { String word = w.get(CoreAnnotations.TextAnnotation.class); SemanticGraph dependency = lastSent.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); IndexedWord t = dependency.getNodeByWordPattern(word); for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(t)) { if (child.first().getShortName().equals("nsubj")) { int subjectIndex = child.second().index(); // start from 1 IntTuple headPosition = new IntTuple(2); headPosition.set(0, paragraph.size() - 1 + paragraphOffset); headPosition.set(1, subjectIndex - 1); if (mentionheadPositions.containsKey(headPosition) && mentionheadPositions.get(headPosition).nerString.startsWith("PER")) { speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID); } } } } } return speaker; }
public static void main(String[] args) { SentenceDAO sentenceDAO = new SentenceDAOImpl(); List<Sentence> sentences = sentenceDAO.findAll(); Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); int i = 0; for (Sentence sentence : sentences) { if (sentence.getPredicate() == null) { try { System.out.println(i++); String text = sentence.getContent(); Annotation annotation = new Annotation(text); pipeline.annotate(annotation); for (CoreMap core : annotation.get(SentencesAnnotation.class)) { SemanticGraph graph = core.get(CollapsedCCProcessedDependenciesAnnotation.class); sentence.setPredicate(graph.getFirstRoot().lemma()); } sentenceDAO.save(sentence); } catch (Exception e) { e.printStackTrace(); } } } // System.out.println(sentence.getWords()); }
private void verifyGraph(SemanticGraph expected, SemanticGraph result) { if (expected == null) { assertEquals(expected, result); return; } assertEquals(expected.vertexSet(), result.vertexSet()); // TODO: Fix the equals for the DirectedMultiGraph so we can compare the two graphs directly assertEquals(expected.toString(), result.toString()); }
private static void testDependencyTree() { String testSentence = "Now is the time for all good men to come to the aid of their country."; CoreNlpParser parser = new CoreNlpParser(); List<SemanticGraph> result = parser.getTextDependencyTree(testSentence); for (SemanticGraph graph : result) { graph.prettyPrint(); } }
/** * Decides whether this word has a direct object. * * @param word the word to analyse * @param graph the sentence to which this word belongs * @return TRUE, if a direct object is present for this verb */ static boolean hasDirectObjectNP(IndexedWord word, SemanticGraph graph) { GrammaticalRelation reln = edu.stanford.nlp.trees.GrammaticalRelation.getRelation( edu.stanford.nlp.trees.EnglishGrammaticalRelations.DirectObjectGRAnnotation.class); if (graph.hasChildWithReln(word, reln)) { String pos = graph.getChildWithReln(word, reln).get(PartOfSpeechAnnotation.class); if (pos.equalsIgnoreCase("NN")) { return true; } } return false; }
/** * A helper to add a single word to a given dependency tree * * @param toModify The tree to add the word to. * @param root The root of the tree where we should be adding the word. * @param rel The relation to add the word with. * @param coreLabel The word to add. */ @SuppressWarnings("UnusedDeclaration") private static void addWord( SemanticGraph toModify, IndexedWord root, String rel, CoreLabel coreLabel) { IndexedWord dependent = new IndexedWord(coreLabel); toModify.addVertex(dependent); toModify.addEdge( root, dependent, GrammaticalRelation.valueOf(Language.English, rel), Double.NEGATIVE_INFINITY, false); }
/** * Thread safety note: nothing special is done to ensure the thread safety of the * GrammaticalStructureFactory. However, both the EnglishGrammaticalStructureFactory and the * ChineseGrammaticalStructureFactory are thread safe. */ public static void fillInParseAnnotations( boolean verbose, boolean buildGraphs, GrammaticalStructureFactory gsf, CoreMap sentence, Tree tree) { // make sure all tree nodes are CoreLabels // TODO: why isn't this always true? something fishy is going on ParserAnnotatorUtils.convertToCoreLabels(tree); // index nodes, i.e., add start and end token positions to all nodes // this is needed by other annotators down stream, e.g., the NFLAnnotator tree.indexSpans(0); sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree); if (verbose) { System.err.println("Tree is:"); tree.pennPrint(System.err); } if (buildGraphs) { String docID = sentence.get(CoreAnnotations.DocIDAnnotation.class); if (docID == null) { docID = ""; } Integer sentenceIndex = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class); int index = sentenceIndex == null ? 0 : sentenceIndex; // generate the dependency graph SemanticGraph deps = SemanticGraphFactory.generateCollapsedDependencies( gsf.newGrammaticalStructure(tree), docID, index); SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies( gsf.newGrammaticalStructure(tree), docID, index); SemanticGraph ccDeps = SemanticGraphFactory.generateCCProcessedDependencies( gsf.newGrammaticalStructure(tree), docID, index); if (verbose) { System.err.println("SDs:"); System.err.println(deps.toString("plain")); } sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps); sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps); sentence.set( SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps); } setMissingTags(sentence, tree); }
/** * Stips aux and mark edges when we are splitting into a clause. * * @param toModify The tree we are stripping the edges from. */ private void stripAuxMark(SemanticGraph toModify) { List<SemanticGraphEdge> toClean = new ArrayList<>(); for (SemanticGraphEdge edge : toModify.outgoingEdgeIterable(toModify.getFirstRoot())) { String rel = edge.getRelation().toString(); if (("aux".equals(rel) || "mark".equals(rel)) && !toModify.outgoingEdgeIterator(edge.getDependent()).hasNext()) { toClean.add(edge); } } for (SemanticGraphEdge edge : toClean) { toModify.removeEdge(edge); toModify.removeVertex(edge.getDependent()); } }
/** * Parse a CoNLL formatted tree into a SemanticGraph object (along with a list of tokens). * * @param conll The CoNLL formatted tree. * @return A pair of a SemanticGraph and a token list, corresponding to the parse of the sentence * and to tokens in the sentence. */ protected Pair<SemanticGraph, List<CoreLabel>> mkTree(String conll) { List<CoreLabel> sentence = new ArrayList<>(); SemanticGraph tree = new SemanticGraph(); for (String line : conll.split("\n")) { if (line.trim().equals("")) { continue; } String[] fields = line.trim().split("\\s+"); int index = Integer.parseInt(fields[0]); String word = fields[1]; CoreLabel label = IETestUtils.mkWord(word, index); sentence.add(label); if (fields[2].equals("0")) { tree.addRoot(new IndexedWord(label)); } else { tree.addVertex(new IndexedWord(label)); } if (fields.length > 4) { label.setTag(fields[4]); } if (fields.length > 5) { label.setNER(fields[5]); } if (fields.length > 6) { label.setLemma(fields[6]); } } int i = 0; for (String line : conll.split("\n")) { if (line.trim().equals("")) { continue; } String[] fields = line.trim().split("\\s+"); int parent = Integer.parseInt(fields[2]); String reln = fields[3]; if (parent > 0) { tree.addEdge( new IndexedWord(sentence.get(parent - 1)), new IndexedWord(sentence.get(i)), new GrammaticalRelation(Language.UniversalEnglish, reln, null, null), 1.0, false); } i += 1; } return Pair.makePair(tree, sentence); }
/** * Generate the training features from the CoNLL input file. * * @return Dataset of feature vectors * @throws Exception */ public GeneralDataset<String, String> generateFeatureVectors(Properties props) throws Exception { GeneralDataset<String, String> dataset = new Dataset<>(); Dictionaries dict = new Dictionaries(props); MentionExtractor mentionExtractor = new CoNLLMentionExtractor(dict, props, new Semantics(dict)); Document document; while ((document = mentionExtractor.nextDoc()) != null) { setTokenIndices(document); document.extractGoldCorefClusters(); Map<Integer, CorefCluster> entities = document.goldCorefClusters; // Generate features for coreferent mentions with class label 1 for (CorefCluster entity : entities.values()) { for (Mention mention : entity.getCorefMentions()) { // Ignore verbal mentions if (mention.headWord.tag().startsWith("V")) continue; IndexedWord head = mention.dependency.getNodeByIndexSafe(mention.headWord.index()); if (head == null) continue; ArrayList<String> feats = mention.getSingletonFeatures(dict); dataset.add(new BasicDatum<>(feats, "1")); } } // Generate features for singletons with class label 0 ArrayList<CoreLabel> gold_heads = new ArrayList<>(); for (Mention gold_men : document.allGoldMentions.values()) { gold_heads.add(gold_men.headWord); } for (Mention predicted_men : document.allPredictedMentions.values()) { SemanticGraph dep = predicted_men.dependency; IndexedWord head = dep.getNodeByIndexSafe(predicted_men.headWord.index()); if (head == null) continue; // Ignore verbal mentions if (predicted_men.headWord.tag().startsWith("V")) continue; // If the mention is in the gold set, it is not a singleton and thus ignore if (gold_heads.contains(predicted_men.headWord)) continue; dataset.add(new BasicDatum<>(predicted_men.getSingletonFeatures(dict), "0")); } } dataset.summaryStatistics(); return dataset; }
public static DependencyParse parse(String text) { if (pipeline == null) { loadModels(); } DependencyParse parse = new DependencyParse(); Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); IndexedWord root = dependencies.getFirstRoot(); parse.setHeadNode(root.index()); List<SemanticGraphEdge> edges = dependencies.edgeListSorted(); // System.out.println(edges); for (SemanticGraphEdge t : edges) { String dep = t.getDependent().originalText(); int depIndex = t.getDependent().index(); String depPOS = t.getDependent().tag(); int depStart = t.getDependent().beginPosition(); int depEnd = t.getDependent().endPosition(); String gov = t.getGovernor().originalText(); int govIndex = t.getGovernor().index(); String govPOS = t.getGovernor().tag(); int govStart = t.getGovernor().beginPosition(); int govEnd = t.getGovernor().endPosition(); parse.addNode(govIndex, gov, govPOS, govStart, govEnd); parse.addNode(depIndex, dep, depPOS, depStart, depEnd); parse.addEdge(depIndex, govIndex, t.getRelation().getShortName()); } } return parse; }
@Override public SemgrexMatcher matcher( SemanticGraph sg, Alignment alignment, SemanticGraph sg_align, boolean hyp, IndexedWord node, Map<String, IndexedWord> namesToNodes, Map<String, String> namesToRelations, VariableStrings variableStrings, boolean ignoreCase) { // System.err.println("making matcher: " + // ((reln.equals(GraphRelation.ALIGNED_ROOT)) ? false : hyp)); return new NodeMatcher( this, sg, alignment, sg_align, (reln.equals(GraphRelation.ALIGNED_ROOT)) ? false : hyp, (reln.equals(GraphRelation.ALIGNED_ROOT)) ? sg_align.getFirstRoot() : node, namesToNodes, namesToRelations, variableStrings, ignoreCase); }
/** * Returns any particle this <code>verb</code> may have * * @param verb * @param graph * @return */ static IndexedWord getParticle(final IndexedWord verb, final SemanticGraph graph) { GrammaticalRelation reln = edu.stanford.nlp.trees.GrammaticalRelation.getRelation( edu.stanford.nlp.trees.EnglishGrammaticalRelations.PhrasalVerbParticleGRAnnotation .class); return graph.getChildWithReln(verb, reln); }
static boolean hasPrepMod(IndexedWord word, SemanticGraph graph) { GrammaticalRelation reln = edu.stanford.nlp.trees.GrammaticalRelation.getRelation( edu.stanford.nlp.trees.EnglishGrammaticalRelations.PrepositionalModifierGRAnnotation .class); return graph.hasChildWithReln(word, reln); }
static Boolean hasParticle(IndexedWord word, SemanticGraph graph) { GrammaticalRelation reln = edu.stanford.nlp.trees.GrammaticalRelation.getRelation( edu.stanford.nlp.trees.EnglishGrammaticalRelations.PhrasalVerbParticleGRAnnotation .class); return graph.hasChildWithReln(word, reln); }
/** * Create a searcher manually, suppling a dependency tree, an optional classifier for when to * split clauses, and a featurizer for that classifier. You almost certainly want to use {@link * ClauseSplitter#load(String)} instead of this constructor. * * @param tree The dependency tree to search over. * @param assumedTruth The assumed truth of the tree (relevant for natural logic inference). If in * doubt, pass in true. * @param isClauseClassifier The classifier for whether a given dependency arc should be a new * clause. If this is not given, all arcs are treated as clause separators. * @param featurizer The featurizer for the classifier. If no featurizer is given, one should be * given in {@link ClauseSplitterSearchProblem#search(java.util.function.Predicate, * Classifier, Map, java.util.function.Function, int)}, or else the classifier will be * useless. * @see ClauseSplitter#load(String) */ protected ClauseSplitterSearchProblem( SemanticGraph tree, boolean assumedTruth, Optional<Classifier<ClauseSplitter.ClauseClassifierLabel, String>> isClauseClassifier, Optional< Function< Triple< ClauseSplitterSearchProblem.State, ClauseSplitterSearchProblem.Action, ClauseSplitterSearchProblem.State>, Counter<String>>> featurizer) { this.tree = new SemanticGraph(tree); this.assumedTruth = assumedTruth; this.isClauseClassifier = isClauseClassifier; this.featurizer = featurizer; // Index edges this.tree.edgeIterable().forEach(edgeToIndex::addToIndex); // Get length List<IndexedWord> sortedVertices = tree.vertexListSorted(); sentenceLength = sortedVertices.get(sortedVertices.size() - 1).index(); // Register extra edges for (IndexedWord vertex : sortedVertices) { extraEdgesByGovernor.put(vertex, new ArrayList<>()); extraEdgesByDependent.put(vertex, new ArrayList<>()); } List<SemanticGraphEdge> extraEdges = Util.cleanTree(this.tree); assert Util.isTree(this.tree); for (SemanticGraphEdge edge : extraEdges) { extraEdgesByGovernor.get(edge.getGovernor()).add(edge); extraEdgesByDependent.get(edge.getDependent()).add(edge); } }
@SuppressWarnings("unchecked") public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean ignoreCase) { // System.out.println(node.word()); if (isRoot) return (negDesc ? !sg.getRoots().contains(node) : sg.getRoots().contains(node)); // System.out.println("not root"); if (isEmpty) return (negDesc ? !node.equals(IndexedWord.NO_WORD) : node.equals(IndexedWord.NO_WORD)); // System.err.println("Attributes are: " + attributes); for (Map.Entry<String, Pattern> attr : attributes.entrySet()) { String key = attr.getKey(); // System.out.println(key); String nodeValue; // if (key.equals("idx")) // nodeValue = Integer.toString(node.index()); // else { Class c = Env.lookupAnnotationKey(env, key); // find class for the key Object value = node.get(c); if (value == null) nodeValue = null; else nodeValue = value.toString(); // } // System.out.println(nodeValue); if (nodeValue == null) return negDesc; Pattern valuePattern = attr.getValue(); boolean matches = false; if (ignoreCase) { if (Pattern.compile(valuePattern.pattern(), Pattern.CASE_INSENSITIVE) .matcher(nodeValue) .matches()) matches = true; } else { if (nodeValue.matches(valuePattern.pattern())) matches = true; } if (!matches) { // System.out.println("doesn't match"); // System.out.println(""); return negDesc; } } // System.out.println("matches"); // System.out.println(""); return !negDesc; }
/** * A little utility function to make sure a SemanticGraph is a tree. * * @param tree The tree to check. * @return True if this {@link edu.stanford.nlp.semgraph.SemanticGraph} is a tree (versus a DAG, * or Graph). */ public static boolean isTree(SemanticGraph tree) { for (IndexedWord vertex : tree.vertexSet()) { // Check one and only one incoming edge if (tree.getRoots().contains(vertex)) { if (tree.incomingEdgeIterator(vertex).hasNext()) { return false; } } else { Iterator<SemanticGraphEdge> iter = tree.incomingEdgeIterator(vertex); if (!iter.hasNext()) { return false; } iter.next(); if (iter.hasNext()) { return false; } } // Check incoming and outgoing edges match for (SemanticGraphEdge edge : tree.outgoingEdgeIterable(vertex)) { boolean foundReverse = false; for (SemanticGraphEdge reverse : tree.incomingEdgeIterable(edge.getDependent())) { if (reverse == edge) { foundReverse = true; } } if (!foundReverse) { return false; } } for (SemanticGraphEdge edge : tree.incomingEdgeIterable(vertex)) { boolean foundReverse = false; for (SemanticGraphEdge reverse : tree.outgoingEdgeIterable(edge.getGovernor())) { if (reverse == edge) { foundReverse = true; } } if (!foundReverse) { return false; } } } // Check for cycles if (isCyclic(tree)) { return false; } // Check topological sort -- sometimes fails? // try { // tree.topologicalSort(); // } catch (Exception e) { // e.printStackTrace(); // return false; // } return true; }
/** * This method decides whether a given <code>verb</code> has a passive subject or a passive * auxiliary. * * @param verb * @param graph * @return */ static boolean isPassive(IndexedWord verb, SemanticGraph graph) { // Examples: // "Dole was defeated by Clinton" nsubjpass(defeated, Dole) GrammaticalRelation nsubjpass = GrammaticalRelation.getRelation(NominalPassiveSubjectGRAnnotation.class); // "That she lied was suspected by everyone" csubjpass(suspected, lied) GrammaticalRelation csubjpass = GrammaticalRelation.getRelation(ClausalPassiveSubjectGRAnnotation.class); // "Kennedy was killed" auxpass(killed, was) GrammaticalRelation auxrel = GrammaticalRelation.getRelation(EnglishGrammaticalRelations.AuxPassiveGRAnnotation.class); Boolean passive = false; passive = passive || graph.hasChildWithReln(verb, nsubjpass); passive = passive || graph.hasChildWithReln(verb, csubjpass); passive = passive || graph.hasChildWithReln(verb, auxrel); return passive; }
/** * Gets the prepositions for this word in this sentence * * @param startingWord * @param graph Sentence * @param preposition A seeding expression. Ex: "in" or "in_front_of" * @return */ public static List<IndexedWord> getPrepRelations( IndexedWord startingWord, SemanticGraph graph, String preposition) { // GrammaticalRelation prepreln = // GrammaticalRelation.getRelation(PrepositionalModifierGRAnnotation.class); GrammaticalRelation prepreln = EnglishGrammaticalRelations.getPrep(preposition); // checking rule: root->prep_in->det return graph.getChildrenWithReln(startingWord, prepreln); }
private boolean findSpeaker( int utterNum, int sentNum, List<CoreMap> sentences, int startIndex, int endIndex, Dictionaries dict) { List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class); for (int i = startIndex; i < endIndex; i++) { if (sent.get(i).get(CoreAnnotations.UtteranceAnnotation.class) != 0) continue; String lemma = sent.get(i).get(CoreAnnotations.LemmaAnnotation.class); String word = sent.get(i).get(CoreAnnotations.TextAnnotation.class); if (dict.reportVerb.contains(lemma)) { // find subject SemanticGraph dependency = sentences .get(sentNum) .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); IndexedWord w = dependency.getNodeByWordPattern(word); if (w != null) { for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(w)) { if (child.first().getShortName().equals("nsubj")) { String subjectString = child.second().word(); int subjectIndex = child.second().index(); // start from 1 IntTuple headPosition = new IntTuple(2); headPosition.set(0, sentNum); headPosition.set(1, subjectIndex - 1); String speaker; if (mentionheadPositions.containsKey(headPosition)) { speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID); } else { speaker = subjectString; } speakers.put(utterNum, speaker); return true; } } } else { SieveCoreferenceSystem.logger.warning("Cannot find node in dependency for word " + word); } } } return false; }
/** * Get a {@link SemgrexMatcher} for this pattern in this graph, with some initial conditions on * the variable assignments */ public SemgrexMatcher matcher(SemanticGraph sg, Map<String, IndexedWord> variables) { return matcher( sg, sg.getFirstRoot(), variables, Generics.<String, String>newHashMap(), new VariableStrings(), false); }
/** * Get a {@link SemgrexMatcher} for this pattern in this graph. * * @param sg the SemanticGraph to match on * @param ignoreCase will ignore case for matching a pattern with a node; not implemented by * Coordination Pattern * @return a SemgrexMatcher */ public SemgrexMatcher matcher(SemanticGraph sg, boolean ignoreCase) { return matcher( sg, sg.getFirstRoot(), Generics.<String, IndexedWord>newHashMap(), Generics.<String, String>newHashMap(), new VariableStrings(), ignoreCase); }
private void testParseTree() { try { Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // read some text in the text variable String text = "Give me a list of all bandleaders that play trumpet."; // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom // types List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods // this is the parse tree of the current sentence Tree tree = sentence.get(TreeAnnotation.class); // this is the Stanford dependency graph of the current sentence SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); Set<IndexedWord> vertices = dependencies.vertexSet(); List<SemanticGraphEdge> edges = dependencies.edgeListSorted(); for (SemanticGraphEdge e : edges) {} for (IndexedWord i : vertices) { System.out.println(i.toString()); } } } catch (Exception e) { } }
/** * Returns the primary thing this sentence is talking about. More precisely: Returns any subject * or a passive subject of the sentence, or the root if none applies. * * @param graph * @return */ public static IndexedWord getSubject(SemanticGraph graph) { if (graph.isEmpty()) { return null; } GrammaticalRelation[] subjects = { EnglishGrammaticalRelations.NOMINAL_SUBJECT, EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT, EnglishGrammaticalRelations.CLAUSAL_SUBJECT, EnglishGrammaticalRelations.CLAUSAL_PASSIVE_SUBJECT }; IndexedWord firstRoot = graph.getFirstRoot(); List<IndexedWord> children = graph.getChildrenWithRelns(firstRoot, Arrays.asList(subjects)); if (children != null && children.size() > 0) { assert children.size() == 1; // not really dangerous. But we need to change our implementation to return a list, // if there are more than one subject. return children.get(0); } // return null; return graph.getFirstRoot(); // in a subject-less sentence, the root is as good as the subject }
/** * Determine if a tree is cyclic. * * @param tree The tree to check. * @return True if the tree has at least once cycle in it. */ public static boolean isCyclic(SemanticGraph tree) { for (IndexedWord vertex : tree.vertexSet()) { if (tree.getRoots().contains(vertex)) { continue; } IndexedWord node = tree.incomingEdgeIterator(vertex).next().getGovernor(); Set<IndexedWord> seen = new HashSet<>(); seen.add(vertex); while (node != null) { if (seen.contains(node)) { return true; } seen.add(node); if (tree.incomingEdgeIterator(node).hasNext()) { node = tree.incomingEdgeIterator(node).next().getGovernor(); } else { node = null; } } } return false; }
public SemgrexMatcher matcher( SemanticGraph hypGraph, Alignment alignment, SemanticGraph txtGraph, boolean ignoreCase) { return matcher( hypGraph, alignment, txtGraph, true, hypGraph.getFirstRoot(), Generics.<String, IndexedWord>newHashMap(), Generics.<String, String>newHashMap(), new VariableStrings(), ignoreCase); }