/** * Thread safety note: nothing special is done to ensure the thread safety of the * GrammaticalStructureFactory. However, both the EnglishGrammaticalStructureFactory and the * ChineseGrammaticalStructureFactory are thread safe. */ public static void fillInParseAnnotations( boolean verbose, boolean buildGraphs, GrammaticalStructureFactory gsf, CoreMap sentence, Tree tree) { // make sure all tree nodes are CoreLabels // TODO: why isn't this always true? something fishy is going on ParserAnnotatorUtils.convertToCoreLabels(tree); // index nodes, i.e., add start and end token positions to all nodes // this is needed by other annotators down stream, e.g., the NFLAnnotator tree.indexSpans(0); sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree); if (verbose) { System.err.println("Tree is:"); tree.pennPrint(System.err); } if (buildGraphs) { String docID = sentence.get(CoreAnnotations.DocIDAnnotation.class); if (docID == null) { docID = ""; } Integer sentenceIndex = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class); int index = sentenceIndex == null ? 0 : sentenceIndex; // generate the dependency graph SemanticGraph deps = SemanticGraphFactory.generateCollapsedDependencies( gsf.newGrammaticalStructure(tree), docID, index); SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies( gsf.newGrammaticalStructure(tree), docID, index); SemanticGraph ccDeps = SemanticGraphFactory.generateCCProcessedDependencies( gsf.newGrammaticalStructure(tree), docID, index); if (verbose) { System.err.println("SDs:"); System.err.println(deps.toString("plain")); } sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps); sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps); sentence.set( SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps); } setMissingTags(sentence, tree); }
@Override public Document nextDoc() throws Exception { List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>(); List<Tree> allTrees = new ArrayList<Tree>(); CoNLL2011DocumentReader.Document conllDoc = reader.getNextDocument(); if (conllDoc == null) { return null; } Annotation anno = conllDoc.getAnnotation(); List<CoreMap> sentences = anno.get(CoreAnnotations.SentencesAnnotation.class); for (CoreMap sentence : sentences) { if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) { // Remove tree from annotation and replace with parse using stanford parser sentence.remove(TreeCoreAnnotations.TreeAnnotation.class); } else { Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); // generate the dependency graph try { SemanticGraph deps = SemanticGraphFactory.makeFromTree( tree, SemanticGraphFactory.Mode.COLLAPSED, includeExtras, lemmatize, threadSafe); SemanticGraph basicDeps = SemanticGraphFactory.makeFromTree( tree, SemanticGraphFactory.Mode.BASIC, includeExtras, lemmatize, threadSafe); sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, basicDeps); sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps); } catch (Exception e) { logger.log( Level.WARNING, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e); } } } String preSpeaker = null; int utterance = -1; for (CoreLabel token : anno.get(CoreAnnotations.TokensAnnotation.class)) { if (!token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) { token.set(CoreAnnotations.SpeakerAnnotation.class, ""); } String curSpeaker = token.get(CoreAnnotations.SpeakerAnnotation.class); if (!curSpeaker.equals(preSpeaker)) { utterance++; preSpeaker = curSpeaker; } token.set(CoreAnnotations.UtteranceAnnotation.class, utterance); } // Run pipeline stanfordProcessor.annotate(anno); for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) { allWords.add(sentence.get(CoreAnnotations.TokensAnnotation.class)); allTrees.add(sentence.get(TreeCoreAnnotations.TreeAnnotation.class)); } // Initialize gold mentions List<List<Mention>> allGoldMentions = extractGoldMentions(conllDoc); List<List<Mention>> allPredictedMentions; if (Constants.USE_GOLD_MENTIONS) { // allPredictedMentions = allGoldMentions; // Make copy of gold mentions since mentions may be later merged, mentionID's changed and // stuff allPredictedMentions = makeCopy(allGoldMentions); } else if (Constants.USE_GOLD_MENTION_BOUNDARIES) { allPredictedMentions = ((RuleBasedCorefMentionFinder) mentionFinder) .filterPredictedMentions(allGoldMentions, anno, dictionaries); } else { allPredictedMentions = mentionFinder.extractPredictedMentions(anno, maxID, dictionaries); } try { recallErrors(allGoldMentions, allPredictedMentions, anno); } catch (IOException e) { throw new RuntimeException(e); } Document doc = arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true); doc.conllDoc = conllDoc; return doc; }
/** * Prints out all matches of a semgrex pattern on a file of dependencies. <br> * Usage:<br> * java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args] <br> * See the help() function for a list of possible arguments to provide. */ public static void main(String[] args) throws IOException { Map<String, Integer> flagMap = Generics.newHashMap(); flagMap.put(PATTERN, 1); flagMap.put(TREE_FILE, 1); flagMap.put(MODE, 1); flagMap.put(EXTRAS, 1); flagMap.put(CONLLU_FILE, 1); flagMap.put(OUTPUT_FORMAT_OPTION, 1); Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap); args = argsMap.get(null); // TODO: allow patterns to be extracted from a file if (!(argsMap.containsKey(PATTERN)) || argsMap.get(PATTERN).length == 0) { help(); System.exit(2); } SemgrexPattern semgrex = SemgrexPattern.compile(argsMap.get(PATTERN)[0]); String modeString = DEFAULT_MODE; if (argsMap.containsKey(MODE) && argsMap.get(MODE).length > 0) { modeString = argsMap.get(MODE)[0].toUpperCase(); } SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.valueOf(modeString); String outputFormatString = DEFAULT_OUTPUT_FORMAT; if (argsMap.containsKey(OUTPUT_FORMAT_OPTION) && argsMap.get(OUTPUT_FORMAT_OPTION).length > 0) { outputFormatString = argsMap.get(OUTPUT_FORMAT_OPTION)[0].toUpperCase(); } OutputFormat outputFormat = OutputFormat.valueOf(outputFormatString); boolean useExtras = true; if (argsMap.containsKey(EXTRAS) && argsMap.get(EXTRAS).length > 0) { useExtras = Boolean.valueOf(argsMap.get(EXTRAS)[0]); } List<SemanticGraph> graphs = Generics.newArrayList(); // TODO: allow other sources of graphs, such as dependency files if (argsMap.containsKey(TREE_FILE) && argsMap.get(TREE_FILE).length > 0) { for (String treeFile : argsMap.get(TREE_FILE)) { System.err.println("Loading file " + treeFile); MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer()); treebank.loadPath(treeFile); for (Tree tree : treebank) { // TODO: allow other languages... this defaults to English SemanticGraph graph = SemanticGraphFactory.makeFromTree( tree, mode, useExtras ? GrammaticalStructure.Extras.MAXIMAL : GrammaticalStructure.Extras.NONE, true); graphs.add(graph); } } } if (argsMap.containsKey(CONLLU_FILE) && argsMap.get(CONLLU_FILE).length > 0) { CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); for (String conlluFile : argsMap.get(CONLLU_FILE)) { System.err.println("Loading file " + conlluFile); Iterator<SemanticGraph> it = reader.getIterator(IOUtils.readerFromString(conlluFile)); while (it.hasNext()) { SemanticGraph graph = it.next(); graphs.add(graph); } } } for (SemanticGraph graph : graphs) { SemgrexMatcher matcher = semgrex.matcher(graph); if (!(matcher.find())) { continue; } if (outputFormat == OutputFormat.LIST) { System.err.println("Matched graph:"); System.err.println(graph.toString(SemanticGraph.OutputFormat.LIST)); boolean found = true; while (found) { System.err.println( "Matches at: " + matcher.getMatch().value() + "-" + matcher.getMatch().index()); List<String> nodeNames = Generics.newArrayList(); nodeNames.addAll(matcher.getNodeNames()); Collections.sort(nodeNames); for (String name : nodeNames) { System.err.println( " " + name + ": " + matcher.getNode(name).value() + "-" + matcher.getNode(name).index()); } System.err.println(); found = matcher.find(); } } else if (outputFormat == OutputFormat.OFFSET) { if (graph.vertexListSorted().isEmpty()) { continue; } System.out.printf( "+%d %s%n", graph.vertexListSorted().get(0).get(CoreAnnotations.LineNumberAnnotation.class), argsMap.get(CONLLU_FILE)[0]); } } }