public static void main(String[] args) throws IOException {
    PrintWriter out;
    if (args.length > 1) {
      out = new PrintWriter(args[1]);
    } else {
      out = new PrintWriter(System.out);
    }
    PrintWriter xmlOut = null;
    if (args.length > 2) {
      xmlOut = new PrintWriter(args[2]);
    }
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner,parse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation annotation;
    if (args.length > 0) {
      annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0]));
    } else {
      annotation =
          new Annotation(
              "Kosgi Santosh sent an email to Stanford University. He didn't get a reply.");
    }

    pipeline.annotate(annotation);
    pipeline.prettyPrint(annotation, out);
  }
Exemple #2
0
  /**
   * 29% in FactorTable.getValue() 28% in CRFCliqueTree.getCalibratedCliqueTree() 12.6% waiting for
   * threads
   *
   * <p>Single threaded: 15000 ms - 26000 ms Multi threaded: 4500 ms - 7000 ms
   *
   * <p>with 8 cpus, 3.3x - 3.7x speedup, around 800% utilization
   */
  public static void benchmarkCRF() {
    Properties props = new Properties();
    props.setProperty("macro", "true"); // use a generic CRF configuration
    props.setProperty("useIfInteger", "true");
    props.setProperty("featureFactory", "edu.stanford.nlp.benchmarks.BenchmarkFeatureFactory");
    props.setProperty("saveFeatureIndexToDisk", "false");

    CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props);

    Random r = new Random(42);

    List<List<CoreLabel>> data = new ArrayList<>();
    for (int i = 0; i < 100; i++) {
      List<CoreLabel> sentence = new ArrayList<>();
      for (int j = 0; j < 20; j++) {
        CoreLabel l = new CoreLabel();

        l.setWord("j:" + j);

        boolean tag = j % 2 == 0 ^ (r.nextDouble() > 0.7);
        l.set(CoreAnnotations.AnswerAnnotation.class, "target:" + tag);
        sentence.add(l);
      }
      data.add(sentence);
    }

    long msStart = System.currentTimeMillis();
    crf.train(data);
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Training took " + delay + " ms");
  }