Java AnnotationUtils.sentenceEntityMentionsToCoreLabels示例

编程语言: Java

命名空间/包名称: edu.stanford.nlp.ie.machinereading.structure

类/类型: AnnotationUtils

方法/功能: sentenceEntityMentionsToCoreLabels

hotexamples.com的示例: 2

Java AnnotationUtils.sentenceEntityMentionsToCoreLabels - 已找到2个示例。这些是从开源项目中提取的最受好评的edu.stanford.nlp.ie.machinereading.structure.AnnotationUtils.sentenceEntityMentionsToCoreLabels现实Java示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

entityMentionsToCoreLabels(2)

sentenceEntityMentionsToCoreLabels(2)

示例#1

显示文件

文件： BasicEntityExtractor.java 项目： automenta/corenlp

  public static void saveCoNLLFiles(
      String dir, Annotation dataset, boolean useSubTypes, boolean alreadyBIO) throws IOException {
    List<CoreMap> sentences = dataset.get(CoreAnnotations.SentencesAnnotation.class);

    String docid = null;
    PrintStream os = null;
    for (CoreMap sentence : sentences) {
      String myDocid = sentence.get(CoreAnnotations.DocIDAnnotation.class);
      if (docid == null || !myDocid.equals(docid)) {
        if (os != null) {
          os.close();
        }
        docid = myDocid;
        os = new PrintStream(new FileOutputStream(dir + File.separator + docid + ".conll"));
      }
      List<CoreLabel> labeledSentence =
          AnnotationUtils.sentenceEntityMentionsToCoreLabels(
              sentence, true, null, null, useSubTypes, alreadyBIO);
      assert (labeledSentence != null);

      String prev = null;
      for (CoreLabel word : labeledSentence) {
        String w = word.word().replaceAll("[ \t\n]+", "_");
        String t = word.get(CoreAnnotations.PartOfSpeechAnnotation.class);
        String l = word.get(CoreAnnotations.AnswerAnnotation.class);
        String nl = l;
        if (!alreadyBIO && !l.equals("O")) {
          if (prev != null && l.equals(prev)) nl = "I-" + l;
          else nl = "B-" + l;
        }
        String line = w + ' ' + t + ' ' + nl;
        String[] toks = line.split("[ \t\n]+");
        if (toks.length != 3) {
          throw new RuntimeException("INVALID LINE: \"" + line + '"');
        }
        os.printf("%s %s %s\n", w, t, nl);
        prev = l;
      }
      os.println();
    }
    if (os != null) {
      os.close();
    }
  }

示例#2

显示文件

文件： BasicEntityExtractor.java 项目： automenta/corenlp

  /**
   * Label entities in an ExtractionSentence. Assumes the classifier has already been trained.
   *
   * @param sentence ExtractionSentence that we want to extract entities from
   * @return an ExtractionSentence with text content, tree and entities set. Relations will not be
   *     set.
   */
  private CoreMap extractEntities(CoreMap sentence, int sentCount) {
    // don't add answer annotations
    List<CoreLabel> testSentence =
        AnnotationUtils.sentenceEntityMentionsToCoreLabels(
            sentence, false, annotationsToSkip, null, useSubTypes, useBIO);

    // now label the sentence
    List<CoreLabel> annotatedSentence = this.classifier.classify(testSentence);
    if (logger.isLoggable(Level.FINEST)) {
      logger.finest("CLASSFIER OUTPUT: " + annotatedSentence);
    }

    List<EntityMention> extractedEntities = new ArrayList<>();
    int i = 0;

    // variables which keep track of partially seen entities (i.e. we've seen
    // some but not all the words in them so far)
    String lastType = null;
    int startIndex = -1;

    //
    // note that labels may be in the BIO or just the IO format. we must handle both transparently
    //
    for (CoreLabel label : annotatedSentence) {
      String type = label.get(AnswerAnnotation.class);
      if (type.equals(SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL)) {
        type = null;
      }

      // this is an entity end boundary followed by O
      if (type == null && lastType != null) {
        makeEntityMention(sentence, startIndex, i, lastType, extractedEntities, sentCount);
        logger.info("Found entity: " + extractedEntities.get(extractedEntities.size() - 1));
        startIndex = -1;
      }

      // entity start preceded by an O
      else if (lastType == null && type != null) {
        startIndex = i;
      }

      // entity end followed by another entity of different type
      else if (lastType != null
          && type != null
          && (type.startsWith("B-")
              || (lastType.startsWith("I-") && type.startsWith("I-") && !lastType.equals(type))
              || (notBIO(lastType) && notBIO(type) && !lastType.equals(type)))) {
        makeEntityMention(sentence, startIndex, i, lastType, extractedEntities, sentCount);
        logger.info("Found entity: " + extractedEntities.get(extractedEntities.size() - 1));
        startIndex = i;
      }

      lastType = type;
      i++;
    }

    // replace the original annotation with the predicted entities
    sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, extractedEntities);
    logger.finest("EXTRACTED ENTITIES: ");
    for (EntityMention e : extractedEntities) {
      if (logger.isLoggable(Level.FINEST)) {
        logger.finest("\t" + e);
      }
    }

    postprocessSentence(sentence, sentCount);

    return sentence;
  }