public static void main(String[] args) throws IOException { PrintWriter out; if (args.length > 1) { out = new PrintWriter(args[1]); } else { out = new PrintWriter(System.out); } PrintWriter xmlOut = null; if (args.length > 2) { xmlOut = new PrintWriter(args[2]); } Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner,parse"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation annotation; if (args.length > 0) { annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0])); } else { annotation = new Annotation( "Kosgi Santosh sent an email to Stanford University. He didn't get a reply."); } pipeline.annotate(annotation); pipeline.prettyPrint(annotation, out); }
/** * 29% in FactorTable.getValue() 28% in CRFCliqueTree.getCalibratedCliqueTree() 12.6% waiting for * threads * * <p>Single threaded: 15000 ms - 26000 ms Multi threaded: 4500 ms - 7000 ms * * <p>with 8 cpus, 3.3x - 3.7x speedup, around 800% utilization */ public static void benchmarkCRF() { Properties props = new Properties(); props.setProperty("macro", "true"); // use a generic CRF configuration props.setProperty("useIfInteger", "true"); props.setProperty("featureFactory", "edu.stanford.nlp.benchmarks.BenchmarkFeatureFactory"); props.setProperty("saveFeatureIndexToDisk", "false"); CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props); Random r = new Random(42); List<List<CoreLabel>> data = new ArrayList<>(); for (int i = 0; i < 100; i++) { List<CoreLabel> sentence = new ArrayList<>(); for (int j = 0; j < 20; j++) { CoreLabel l = new CoreLabel(); l.setWord("j:" + j); boolean tag = j % 2 == 0 ^ (r.nextDouble() > 0.7); l.set(CoreAnnotations.AnswerAnnotation.class, "target:" + tag); sentence.add(l); } data.add(sentence); } long msStart = System.currentTimeMillis(); crf.train(data); long delay = System.currentTimeMillis() - msStart; System.out.println("Training took " + delay + " ms"); }