コード例 #1
0
 public DeterministicCorefAnnotator(Properties props) {
   try {
     corefSystem = new SieveCoreferenceSystem(props);
     mentionExtractor = new MentionExtractor(corefSystem.dictionaries(), corefSystem.semantics());
     OLD_FORMAT = Boolean.parseBoolean(props.getProperty("oldCorefFormat", "false"));
   } catch (Exception e) {
     System.err.println("ERROR: cannot create DeterministicCorefAnnotator!");
     e.printStackTrace();
     throw new RuntimeException(e);
   }
 }
コード例 #2
0
  public void annotate(Annotation annotation) {
    try {
      List<Tree> trees = new ArrayList<Tree>();
      List<List<CoreLabel>> sentences = new ArrayList<List<CoreLabel>>();

      // extract trees and sentence words
      // we are only supporting the new annotation standard for this Annotator!
      if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
        int sentNum = 0;
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
          List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
          sentences.add(tokens);
          Tree tree = sentence.get(TreeAnnotation.class);
          trees.add(tree);

          MentionExtractor.mergeLabels(tree, tokens);
          MentionExtractor.initializeUtterance(tokens);
        }
      } else {
        System.err.println(
            "ERROR: this coreference resolution system requires SentencesAnnotation!");
        return;
      }

      // extract all possible mentions
      RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder();
      List<List<Mention>> allUnprocessedMentions =
          finder.extractPredictedMentions(annotation, 0, corefSystem.dictionaries());

      // add the relevant info to mentions and order them for coref
      Document document =
          mentionExtractor.arrange(annotation, sentences, trees, allUnprocessedMentions);
      List<List<Mention>> orderedMentions = document.getOrderedMentions();
      if (VERBOSE) {
        for (int i = 0; i < orderedMentions.size(); i++) {
          System.err.printf("Mentions in sentence #%d:\n", i);
          for (int j = 0; j < orderedMentions.get(i).size(); j++) {
            System.err.println(
                "\tMention #" + j + ": " + orderedMentions.get(i).get(j).spanToString());
          }
        }
      }

      Map<Integer, CorefChain> result = corefSystem.coref(document);
      annotation.set(CorefChainAnnotation.class, result);

      // for backward compatibility
      if (OLD_FORMAT) {
        List<Pair<IntTuple, IntTuple>> links = SieveCoreferenceSystem.getLinks(result);

        if (VERBOSE) {
          System.err.printf("Found %d coreference links:\n", links.size());
          for (Pair<IntTuple, IntTuple> link : links) {
            System.err.printf(
                "LINK (%d, %d) -> (%d, %d)\n",
                link.first.get(0), link.first.get(1), link.second.get(0), link.second.get(1));
          }
        }

        //
        // save the coref output as CorefGraphAnnotation
        //
        List<List<CoreLabel>> sents = new ArrayList<List<CoreLabel>>();
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
          List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
          sents.add(tokens);
        }
        // this graph is stored in CorefGraphAnnotation -- the raw links found by the coref system
        List<Pair<IntTuple, IntTuple>> graph = new ArrayList<Pair<IntTuple, IntTuple>>();

        for (Pair<IntTuple, IntTuple> link : links) {
          //
          // Note: all offsets in the graph start at 1 (not at 0!)
          //       we do this for consistency reasons, as indices for syntactic dependencies start
          // at 1
          //
          int srcSent = link.first.get(0);
          int srcTok = orderedMentions.get(srcSent - 1).get(link.first.get(1) - 1).headIndex + 1;
          int dstSent = link.second.get(0);
          int dstTok = orderedMentions.get(dstSent - 1).get(link.second.get(1) - 1).headIndex + 1;
          IntTuple dst = new IntTuple(2);
          dst.set(0, dstSent);
          dst.set(1, dstTok);
          IntTuple src = new IntTuple(2);
          src.set(0, srcSent);
          src.set(1, srcTok);
          graph.add(new Pair<IntTuple, IntTuple>(src, dst));
        }
        annotation.set(CorefGraphAnnotation.class, graph);

        for (CorefChain corefChain : result.values()) {
          if (corefChain.getCorefMentions().size() < 2) continue;
          Set<CoreLabel> coreferentTokens = new HashSet<CoreLabel>();
          Set<CoreLabel> cyclicCoreferentTokens = new HashSet<CoreLabel>();
          for (CorefMention mention : corefChain.getCorefMentions()) {
            CoreMap sentence = annotation.get(SentencesAnnotation.class).get(mention.sentNum - 1);
            CoreLabel token = sentence.get(TokensAnnotation.class).get(mention.headIndex - 1);

            // this stuff is so things will mostly work without us replacing all
            // tokens with CyclicCoreLabels whenever coref is run (which maybe
            // wouldn't be a terrible thing?)
            coreferentTokens.add(token);
            cyclicCoreferentTokens.add(new CyclicCoreLabel(token));
          }
          for (CoreLabel token : coreferentTokens) {
            token.set(CorefClusterAnnotation.class, cyclicCoreferentTokens);
          }
          for (CoreLabel token : cyclicCoreferentTokens) {
            token.set(CorefClusterAnnotation.class, cyclicCoreferentTokens);
          }
        }
      }
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }