예제 #1
0
  /**
   * 29% in FactorTable.getValue() 28% in CRFCliqueTree.getCalibratedCliqueTree() 12.6% waiting for
   * threads
   *
   * <p>Single threaded: 15000 ms - 26000 ms Multi threaded: 4500 ms - 7000 ms
   *
   * <p>with 8 cpus, 3.3x - 3.7x speedup, around 800% utilization
   */
  public static void benchmarkCRF() {
    Properties props = new Properties();
    props.setProperty("macro", "true"); // use a generic CRF configuration
    props.setProperty("useIfInteger", "true");
    props.setProperty("featureFactory", "edu.stanford.nlp.benchmarks.BenchmarkFeatureFactory");
    props.setProperty("saveFeatureIndexToDisk", "false");

    CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(props);

    Random r = new Random(42);

    List<List<CoreLabel>> data = new ArrayList<>();
    for (int i = 0; i < 100; i++) {
      List<CoreLabel> sentence = new ArrayList<>();
      for (int j = 0; j < 20; j++) {
        CoreLabel l = new CoreLabel();

        l.setWord("j:" + j);

        boolean tag = j % 2 == 0 ^ (r.nextDouble() > 0.7);
        l.set(CoreAnnotations.AnswerAnnotation.class, "target:" + tag);
        sentence.add(l);
      }
      data.add(sentence);
    }

    long msStart = System.currentTimeMillis();
    crf.train(data);
    long delay = System.currentTimeMillis() - msStart;
    System.out.println("Training took " + delay + " ms");
  }
예제 #2
0
 private static void taggedLeafLabels(Tree t, List<CoreLabel> l) {
   if (t.isPreTerminal()) {
     CoreLabel fl = (CoreLabel) t.getChild(0).label();
     fl.set(TagLabelAnnotation.class, t.label());
     l.add(fl);
   } else {
     Tree[] kids = t.children();
     for (int j = 0, n = kids.length; j < n; j++) {
       taggedLeafLabels(kids[j], l);
     }
   }
 }
예제 #3
0
  public static final String doCorefResolution(Annotation annotation) {

    Map<Integer, CorefChain> corefs = annotation.get(CorefChainAnnotation.class);
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<String> resolved = new ArrayList<String>();
    for (CoreMap sentence : sentences) {
      List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
      for (CoreLabel token : tokens) {
        Integer corefClustId = token.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class);
        CorefChain chain = corefs.get(corefClustId);
        if (chain == null) resolved.add(token.word());
        else {
          int sentINdx = chain.getRepresentativeMention().sentNum - 1;
          CoreMap corefSentence = sentences.get(sentINdx);
          List<CoreLabel> corefSentenceTokens = corefSentence.get(TokensAnnotation.class);
          CorefMention reprMent = chain.getRepresentativeMention();
          if (token.index() < reprMent.startIndex || token.index() > reprMent.endIndex) {
            for (int i = reprMent.startIndex; i < reprMent.endIndex; i++) {
              CoreLabel matchedLabel = corefSentenceTokens.get(i - 1);
              resolved.add(matchedLabel.word());
            }
          } else resolved.add(token.word());
        }
      }
    }
    String resolvedStr = "";
    System.out.println();
    for (String str : resolved) {
      resolvedStr += str + " ";
    }
    System.out.println(resolvedStr);

    return resolvedStr;
  }
 /**
  * Create a mock node, to be added to the dependency tree but which is not part of the original
  * sentence.
  *
  * @param toCopy The CoreLabel to copy from initially.
  * @param word The new word to add.
  * @param POS The new part of speech to add.
  * @return A CoreLabel copying most fields from toCopy, but with a new word and POS tag (as well
  *     as a new index).
  */
 @SuppressWarnings("UnusedDeclaration")
 private CoreLabel mockNode(CoreLabel toCopy, String word, String POS) {
   CoreLabel mock = new CoreLabel(toCopy);
   mock.setWord(word);
   mock.setLemma(word);
   mock.setValue(word);
   mock.setNER("O");
   mock.setTag(POS);
   mock.setIndex(sentenceLength + 5);
   return mock;
 }