public static DependencyParse parse(String text) { if (pipeline == null) { loadModels(); } DependencyParse parse = new DependencyParse(); Annotation document = new Annotation(text); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); IndexedWord root = dependencies.getFirstRoot(); parse.setHeadNode(root.index()); List<SemanticGraphEdge> edges = dependencies.edgeListSorted(); // System.out.println(edges); for (SemanticGraphEdge t : edges) { String dep = t.getDependent().originalText(); int depIndex = t.getDependent().index(); String depPOS = t.getDependent().tag(); int depStart = t.getDependent().beginPosition(); int depEnd = t.getDependent().endPosition(); String gov = t.getGovernor().originalText(); int govIndex = t.getGovernor().index(); String govPOS = t.getGovernor().tag(); int govStart = t.getGovernor().beginPosition(); int govEnd = t.getGovernor().endPosition(); parse.addNode(govIndex, gov, govPOS, govStart, govEnd); parse.addNode(depIndex, dep, depPOS, depStart, depEnd); parse.addEdge(depIndex, govIndex, t.getRelation().getShortName()); } } return parse; }
private void testParseTree() { try { Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // read some text in the text variable String text = "Give me a list of all bandleaders that play trumpet."; // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom // types List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods // this is the parse tree of the current sentence Tree tree = sentence.get(TreeAnnotation.class); // this is the Stanford dependency graph of the current sentence SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); Set<IndexedWord> vertices = dependencies.vertexSet(); List<SemanticGraphEdge> edges = dependencies.edgeListSorted(); for (SemanticGraphEdge e : edges) {} for (IndexedWord i : vertices) { System.out.println(i.toString()); } } } catch (Exception e) { } }