Exemplo n.º 1
0
  /**
   * Label entities in an ExtractionSentence. Assumes the classifier has already been trained.
   *
   * @param sentence ExtractionSentence that we want to extract entities from
   * @return an ExtractionSentence with text content, tree and entities set. Relations will not be
   *     set.
   */
  private CoreMap extractEntities(CoreMap sentence, int sentCount) {
    // don't add answer annotations
    List<CoreLabel> testSentence =
        AnnotationUtils.sentenceEntityMentionsToCoreLabels(
            sentence, false, annotationsToSkip, null, useSubTypes, useBIO);

    // now label the sentence
    List<CoreLabel> annotatedSentence = this.classifier.classify(testSentence);
    if (logger.isLoggable(Level.FINEST)) {
      logger.finest("CLASSFIER OUTPUT: " + annotatedSentence);
    }

    List<EntityMention> extractedEntities = new ArrayList<>();
    int i = 0;

    // variables which keep track of partially seen entities (i.e. we've seen
    // some but not all the words in them so far)
    String lastType = null;
    int startIndex = -1;

    //
    // note that labels may be in the BIO or just the IO format. we must handle both transparently
    //
    for (CoreLabel label : annotatedSentence) {
      String type = label.get(AnswerAnnotation.class);
      if (type.equals(SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL)) {
        type = null;
      }

      // this is an entity end boundary followed by O
      if (type == null && lastType != null) {
        makeEntityMention(sentence, startIndex, i, lastType, extractedEntities, sentCount);
        logger.info("Found entity: " + extractedEntities.get(extractedEntities.size() - 1));
        startIndex = -1;
      }

      // entity start preceded by an O
      else if (lastType == null && type != null) {
        startIndex = i;
      }

      // entity end followed by another entity of different type
      else if (lastType != null
          && type != null
          && (type.startsWith("B-")
              || (lastType.startsWith("I-") && type.startsWith("I-") && !lastType.equals(type))
              || (notBIO(lastType) && notBIO(type) && !lastType.equals(type)))) {
        makeEntityMention(sentence, startIndex, i, lastType, extractedEntities, sentCount);
        logger.info("Found entity: " + extractedEntities.get(extractedEntities.size() - 1));
        startIndex = i;
      }

      lastType = type;
      i++;
    }

    // replace the original annotation with the predicted entities
    sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, extractedEntities);
    logger.finest("EXTRACTED ENTITIES: ");
    for (EntityMention e : extractedEntities) {
      if (logger.isLoggable(Level.FINEST)) {
        logger.finest("\t" + e);
      }
    }

    postprocessSentence(sentence, sentCount);

    return sentence;
  }