Example #1
0
  /**
   * 29% in FactorTable.getValue() 28% in CRFCliqueTree.getCalibratedCliqueTree() 12.6% waiting for
   * threads
   *
   * <p>Single threaded: 15000 ms - 26000 ms Multi threaded: 4500 ms - 7000 ms
   *
   * <p>with 8 cpus, 3.3x - 3.7x speedup, around 800% utilization
   */
  public static void benchmarkCRF() {
    Properties props = new Properties();
    props.setProperty("macro", "true"); // use a generic CRF configuration
    props.setProperty("useIfInteger", "true");
    props.setProperty("featureFactory", "edu.stanford.nlp.benchmarks.BenchmarkFeatureFactory");
    props.setProperty("saveFeatureIndexToDisk", "false");

    CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props);

    Random r = new Random(42);

    List<List<CoreLabel>> data = new ArrayList<>();
    for (int i = 0; i < 100; i++) {
      List<CoreLabel> sentence = new ArrayList<>();
      for (int j = 0; j < 20; j++) {
        CoreLabel l = new CoreLabel();

        l.setWord("j:" + j);

        boolean tag = j % 2 == 0 ^ (r.nextDouble() > 0.7);
        l.set(CoreAnnotations.AnswerAnnotation.class, "target:" + tag);
        sentence.add(l);
      }
      data.add(sentence);
    }

    long msStart = System.currentTimeMillis();
    crf.train(data);
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Training took " + delay + " ms");
  }
 /**
  * Create a mock node, to be added to the dependency tree but which is not part of the original
  * sentence.
  *
  * @param toCopy The CoreLabel to copy from initially.
  * @param word The new word to add.
  * @param POS The new part of speech to add.
  * @return A CoreLabel copying most fields from toCopy, but with a new word and POS tag (as well
  *     as a new index).
  */
 @SuppressWarnings("UnusedDeclaration")
 private CoreLabel mockNode(CoreLabel toCopy, String word, String POS) {
   CoreLabel mock = new CoreLabel(toCopy);
   mock.setWord(word);
   mock.setLemma(word);
   mock.setValue(word);
   mock.setNER("O");
   mock.setTag(POS);
   mock.setIndex(sentenceLength + 5);
   return mock;
 }