public static void standfordNLP() { CoreLabelTokenFactory ctf = new CoreLabelTokenFactory(); PTBTokenizer<CoreLabel> ptb = new PTBTokenizer<>(new StringReader(paragraph), ctf, "invertible=true"); while (ptb.hasNext()) { CoreLabel cl = ptb.next(); System.out.print( cl.originalText() + " [" + cl.beginPosition() + "-" + cl.endPosition() + "];"); } System.out.println(); }
public static void resolveAnaphora(String text) { RedwoodConfiguration.empty().capture(System.err).apply(); Annotation document = new Annotation(text); Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.put("dcoref.female", "female.unigram.txt"); props.put("dcoref.male", "male.unigram.txt"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); pipeline.annotate(document); RedwoodConfiguration.current().clear().apply(); Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class); List<CoreMap> stnfrdSentences = document.get(SentencesAnnotation.class); ImmutableMultimap.Builder<Integer, Pair<CorefChain, CorefMention>> records = ImmutableMultimap.builder(); ImmutableMultimap.Builder<Integer, Pair<CorefChain, CorefMention>> recordsOrdered = ImmutableMultimap.builder(); graph.forEach( (key, value) -> { value .getMentionMap() .forEach( (intPair, corefSet) -> { corefSet.forEach( mention -> records.put(mention.sentNum, Pair.of(value, mention))); }); }); recordsOrdered = records.orderKeysBy( new Comparator<Integer>() { @Override public int compare(Integer o1, Integer o2) { return o1 - o2; } }); recordsOrdered .build() .asMap() .forEach( (sentNum, mentionList) -> { CoreMap sentence = stnfrdSentences.get(sentNum - 1); List<CoreLabel> stnfrdtokens = sentence.get(TokensAnnotation.class); mentionList.forEach( pair -> { CorefChain chain = pair.getLeft(); CorefMention mention = pair.getRight(); String root = chain.getRepresentativeMention().mentionSpan; if (!mention.mentionSpan.equalsIgnoreCase(root) && (!root.contains(mention.mentionSpan) && !mention.mentionSpan.contains(root)) && (!replacementList.contains(root.toLowerCase())) && (root.split("\\s").length < 3) && (replacementList.contains(mention.mentionSpan.toLowerCase()))) { if (mention.mentionSpan.equalsIgnoreCase("her") || mention.mentionSpan.equalsIgnoreCase("his")) { root += "'s"; } stnfrdtokens.get(mention.startIndex - 1).setOriginalText(root); } }); String sent = ""; for (CoreLabel token : stnfrdtokens) { sent += token.originalText() + " "; } ; resolvedSentences += sent + "\n"; }); }
/** ************************************************************ */ private boolean needSpaceBefore(CoreLabel label) { return needSpaceBefore(label.originalText()); }