/** * 29% in FactorTable.getValue() 28% in CRFCliqueTree.getCalibratedCliqueTree() 12.6% waiting for * threads * * <p>Single threaded: 15000 ms - 26000 ms Multi threaded: 4500 ms - 7000 ms * * <p>with 8 cpus, 3.3x - 3.7x speedup, around 800% utilization */ public static void benchmarkCRF() { Properties props = new Properties(); props.setProperty("macro", "true"); // use a generic CRF configuration props.setProperty("useIfInteger", "true"); props.setProperty("featureFactory", "edu.stanford.nlp.benchmarks.BenchmarkFeatureFactory"); props.setProperty("saveFeatureIndexToDisk", "false"); CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props); Random r = new Random(42); List<List<CoreLabel>> data = new ArrayList<>(); for (int i = 0; i < 100; i++) { List<CoreLabel> sentence = new ArrayList<>(); for (int j = 0; j < 20; j++) { CoreLabel l = new CoreLabel(); l.setWord("j:" + j); boolean tag = j % 2 == 0 ^ (r.nextDouble() > 0.7); l.set(CoreAnnotations.AnswerAnnotation.class, "target:" + tag); sentence.add(l); } data.add(sentence); } long msStart = System.currentTimeMillis(); crf.train(data); long delay = System.currentTimeMillis() - msStart; System.out.println("Training took " + delay + " ms"); }
private static void taggedLeafLabels(Tree t, List<CoreLabel> l) { if (t.isPreTerminal()) { CoreLabel fl = (CoreLabel) t.getChild(0).label(); fl.set(TagLabelAnnotation.class, t.label()); l.add(fl); } else { Tree[] kids = t.children(); for (int j = 0, n = kids.length; j < n; j++) { taggedLeafLabels(kids[j], l); } } }
public static final String doCorefResolution(Annotation annotation) { Map<Integer, CorefChain> corefs = annotation.get(CorefChainAnnotation.class); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); List<String> resolved = new ArrayList<String>(); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { Integer corefClustId = token.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); CorefChain chain = corefs.get(corefClustId); if (chain == null) resolved.add(token.word()); else { int sentINdx = chain.getRepresentativeMention().sentNum - 1; CoreMap corefSentence = sentences.get(sentINdx); List<CoreLabel> corefSentenceTokens = corefSentence.get(TokensAnnotation.class); CorefMention reprMent = chain.getRepresentativeMention(); if (token.index() < reprMent.startIndex || token.index() > reprMent.endIndex) { for (int i = reprMent.startIndex; i < reprMent.endIndex; i++) { CoreLabel matchedLabel = corefSentenceTokens.get(i - 1); resolved.add(matchedLabel.word()); } } else resolved.add(token.word()); } } } String resolvedStr = ""; System.out.println(); for (String str : resolved) { resolvedStr += str + " "; } System.out.println(resolvedStr); return resolvedStr; }
/** * Create a mock node, to be added to the dependency tree but which is not part of the original * sentence. * * @param toCopy The CoreLabel to copy from initially. * @param word The new word to add. * @param POS The new part of speech to add. * @return A CoreLabel copying most fields from toCopy, but with a new word and POS tag (as well * as a new index). */ @SuppressWarnings("UnusedDeclaration") private CoreLabel mockNode(CoreLabel toCopy, String word, String POS) { CoreLabel mock = new CoreLabel(toCopy); mock.setWord(word); mock.setLemma(word); mock.setValue(word); mock.setNER("O"); mock.setTag(POS); mock.setIndex(sentenceLength + 5); return mock; }