/** * 29% in FactorTable.getValue() 28% in CRFCliqueTree.getCalibratedCliqueTree() 12.6% waiting for * threads * * <p>Single threaded: 15000 ms - 26000 ms Multi threaded: 4500 ms - 7000 ms * * <p>with 8 cpus, 3.3x - 3.7x speedup, around 800% utilization */ public static void benchmarkCRF() { Properties props = new Properties(); props.setProperty("macro", "true"); // use a generic CRF configuration props.setProperty("useIfInteger", "true"); props.setProperty("featureFactory", "edu.stanford.nlp.benchmarks.BenchmarkFeatureFactory"); props.setProperty("saveFeatureIndexToDisk", "false"); CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props); Random r = new Random(42); List<List<CoreLabel>> data = new ArrayList<>(); for (int i = 0; i < 100; i++) { List<CoreLabel> sentence = new ArrayList<>(); for (int j = 0; j < 20; j++) { CoreLabel l = new CoreLabel(); l.setWord("j:" + j); boolean tag = j % 2 == 0 ^ (r.nextDouble() > 0.7); l.set(CoreAnnotations.AnswerAnnotation.class, "target:" + tag); sentence.add(l); } data.add(sentence); } long msStart = System.currentTimeMillis(); crf.train(data); long delay = System.currentTimeMillis() - msStart; System.out.println("Training took " + delay + " ms"); }
/** * Copy Constructor - relies on {@link CoreLabel} copy constructor It will set the value, and if * the word is not set otherwise, set the word to the value. * * @param w A Label to initialize this IndexedWord from */ public IndexedWord(Label w) { if (w instanceof CoreLabel) { this.label = (CoreLabel) w; } else { label = new CoreLabel(w); if (label.word() == null) { label.setWord(label.value()); } } }
private static void taggedLeafLabels(Tree t, List<CoreLabel> l) { if (t.isPreTerminal()) { CoreLabel fl = (CoreLabel) t.getChild(0).label(); fl.set(TagLabelAnnotation.class, t.label()); l.add(fl); } else { Tree[] kids = t.children(); for (int j = 0, n = kids.length; j < n; j++) { taggedLeafLabels(kids[j], l); } } }
public static final String doCorefResolution(Annotation annotation) { Map<Integer, CorefChain> corefs = annotation.get(CorefChainAnnotation.class); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); List<String> resolved = new ArrayList<String>(); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { Integer corefClustId = token.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); CorefChain chain = corefs.get(corefClustId); if (chain == null) resolved.add(token.word()); else { int sentINdx = chain.getRepresentativeMention().sentNum - 1; CoreMap corefSentence = sentences.get(sentINdx); List<CoreLabel> corefSentenceTokens = corefSentence.get(TokensAnnotation.class); CorefMention reprMent = chain.getRepresentativeMention(); if (token.index() < reprMent.startIndex || token.index() > reprMent.endIndex) { for (int i = reprMent.startIndex; i < reprMent.endIndex; i++) { CoreLabel matchedLabel = corefSentenceTokens.get(i - 1); resolved.add(matchedLabel.word()); } } else resolved.add(token.word()); } } } String resolvedStr = ""; System.out.println(); for (String str : resolved) { resolvedStr += str + " "; } System.out.println(resolvedStr); return resolvedStr; }
public void testCoreLabelListToString() { List<CoreLabel> clWords = new ArrayList<>(); List<CoreLabel> clValues = new ArrayList<>(); List<CoreLabel> clWordTags = new ArrayList<>(); List<CoreLabel> clValueTags = new ArrayList<>(); for (int i = 0; i < words.length; ++i) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); clWords.add(cl); cl = new CoreLabel(); cl.setValue(words[i]); clValues.add(cl); cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); clWordTags.add(cl); cl = new CoreLabel(); cl.setValue(words[i]); cl.setTag(tags[i]); clValueTags.add(cl); } assertEquals(expectedValueOnly, SentenceUtils.listToString(clWords, true)); assertEquals(expectedValueOnly, SentenceUtils.listToString(clValues, true)); assertEquals(expectedTagged, SentenceUtils.listToString(clWordTags, false, separator)); assertEquals(expectedTagged, SentenceUtils.listToString(clValueTags, false, separator)); }
/** * Create a mock node, to be added to the dependency tree but which is not part of the original * sentence. * * @param toCopy The CoreLabel to copy from initially. * @param word The new word to add. * @param POS The new part of speech to add. * @return A CoreLabel copying most fields from toCopy, but with a new word and POS tag (as well * as a new index). */ @SuppressWarnings("UnusedDeclaration") private CoreLabel mockNode(CoreLabel toCopy, String word, String POS) { CoreLabel mock = new CoreLabel(toCopy); mock.setWord(word); mock.setLemma(word); mock.setValue(word); mock.setNER("O"); mock.setTag(POS); mock.setIndex(sentenceLength + 5); return mock; }
@Override public void setOriginalText(String originalText) { label.setOriginalText(originalText); }
@Override public String originalText() { return label.originalText(); }
@Override public String word() { return label.word(); }
@Override public String value() { return label.value(); }
/** * Constructor for setting docID, sentenceIndex, and index without any other annotations. * * @param docID The document ID (arbitrary string) * @param sentenceIndex The sentence number in the document (normally 0-based) * @param index The index of the word in the sentence (normally 0-based) */ public IndexedWord(String docID, int sentenceIndex, int index) { label = new CoreLabel(); label.set(CoreAnnotations.DocIDAnnotation.class, docID); label.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex); label.set(CoreAnnotations.IndexAnnotation.class, index); }
public String toString(CoreLabel.OutputFormat format) { return label.toString(format) + toPrimes(); }
@Override public void setBeginPosition(int beginPos) { label.setBeginPosition(beginPos); }
@Override public String docID() { return label.docID(); }
@Override public void setNER(String ner) { label.setNER(ner); }
@Override public String ner() { return label.ner(); }
@Override public void setLemma(String lemma) { label.setLemma(lemma); }
@Override public String lemma() { return label.lemma(); }
@Override public void setWord(String word) { label.setWord(word); }
@Override public int beginPosition() { return label.beginPosition(); }
@Override public int endPosition() { return label.endPosition(); }
@Override public void setDocID(String docID) { label.setDocID(docID); }
@Override public void setEndPosition(int endPos) { label.setEndPosition(endPos); }
@Override public int index() { return label.index(); }
@Override public void setValue(String value) { label.setValue(value); }
@Override public String tag() { return label.tag(); }
@Override public int sentIndex() { return label.sentIndex(); }
@Override public void setSentIndex(int sentIndex) { label.setSentIndex(sentIndex); }
@Override public void setIndex(int index) { label.setIndex(index); }
@Override public void setTag(String tag) { label.setTag(tag); }