Пример #1
0
  /** Check one mention is the speaker of the other mention */
  public static boolean isSpeaker(Mention m, Mention ant, Dictionaries dict) {

    if (!dict.firstPersonPronouns.contains(ant.spanToString().toLowerCase())
        || ant.number == Number.PLURAL
        || ant.sentNum != m.sentNum) return false;

    int countQuotationMark = 0;
    for (int i = Math.min(m.headIndex, ant.headIndex) + 1;
        i < Math.max(m.headIndex, ant.headIndex);
        i++) {
      String word = m.sentenceWords.get(i).get(CoreAnnotations.TextAnnotation.class);
      if (word.equals("``") || word.equals("''")) countQuotationMark++;
    }
    if (countQuotationMark != 1) return false;

    IndexedWord w =
        m.dependency.getNodeByWordPattern(
            m.sentenceWords.get(m.headIndex).get(CoreAnnotations.TextAnnotation.class));
    if (w == null) return false;

    for (Pair<GrammaticalRelation, IndexedWord> parent : m.dependency.parentPairs(w)) {
      if (parent.first().getShortName().equals("nsubj")
          && dict.reportVerb.contains(parent.second().get(CoreAnnotations.LemmaAnnotation.class))) {
        return true;
      }
    }
    return false;
  }
Пример #2
0
 private void findSpeakersInConversation(Dictionaries dict) {
   for (List<Mention> l : predictedOrderedMentionsBySentence) {
     for (Mention m : l) {
       if (m.predicateNominatives == null) continue;
       for (Mention a : m.predicateNominatives) {
         if (a.spanToString().toLowerCase().equals("i")) {
           speakers.put(
               m.headWord.get(CoreAnnotations.UtteranceAnnotation.class),
               Integer.toString(m.mentionID));
         }
       }
     }
   }
   List<CoreMap> paragraph = new ArrayList<CoreMap>();
   int paragraphUtterIndex = 0;
   String nextParagraphSpeaker = "";
   int paragraphOffset = 0;
   for (CoreMap sent : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
     int currentUtter =
         sent.get(CoreAnnotations.TokensAnnotation.class)
             .get(0)
             .get(CoreAnnotations.UtteranceAnnotation.class);
     if (paragraphUtterIndex != currentUtter) {
       nextParagraphSpeaker =
           findParagraphSpeaker(
               paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict);
       paragraphUtterIndex = currentUtter;
       paragraphOffset += paragraph.size();
       paragraph = new ArrayList<CoreMap>();
     }
     paragraph.add(sent);
   }
   findParagraphSpeaker(
       paragraph, paragraphUtterIndex, nextParagraphSpeaker, paragraphOffset, dict);
 }
Пример #3
0
  /** Mark twin mentions: All mention boundaries should be matched */
  private void findTwinMentionsStrict() {
    for (int sentNum = 0; sentNum < goldOrderedMentionsBySentence.size(); sentNum++) {
      List<Mention> golds = goldOrderedMentionsBySentence.get(sentNum);
      List<Mention> predicts = predictedOrderedMentionsBySentence.get(sentNum);

      // For CoNLL training there are some documents with gold mentions with the same position
      // offsets
      // See
      // /scr/nlp/data/conll-2011/v2/data/train/data/english/annotations/nw/wsj/09/wsj_0990.v2_auto_conll
      //  (Packwood - Roth)
      CollectionValuedMap<IntPair, Mention> goldMentionPositions =
          new CollectionValuedMap<IntPair, Mention>();
      for (Mention g : golds) {
        IntPair ip = new IntPair(g.startIndex, g.endIndex);
        if (goldMentionPositions.containsKey(ip)) {
          StringBuilder existingMentions = new StringBuilder();
          for (Mention eg : goldMentionPositions.get(ip)) {
            if (existingMentions.length() > 0) {
              existingMentions.append(",");
            }
            existingMentions.append(eg.mentionID);
          }
          SieveCoreferenceSystem.logger.warning(
              "WARNING: gold mentions with the same offsets: "
                  + ip
                  + " mentions="
                  + g.mentionID
                  + ","
                  + existingMentions
                  + ", "
                  + g.spanToString());
        }
        // assert(!goldMentionPositions.containsKey(ip));
        goldMentionPositions.add(new IntPair(g.startIndex, g.endIndex), g);
      }
      for (Mention p : predicts) {
        IntPair pos = new IntPair(p.startIndex, p.endIndex);
        if (goldMentionPositions.containsKey(pos)) {
          Collection<Mention> cm = goldMentionPositions.get(pos);
          Mention g = cm.iterator().next();
          cm.remove(g);
          p.mentionID = g.mentionID;
          p.twinless = false;
          g.twinless = false;
        }
      }
      // temp: for making easy to recognize twinless mention
      for (Mention p : predicts) {
        if (p.twinless) p.mentionID += 10000;
      }
    }
  }
Пример #4
0
  /** initialize positions and corefClusters (put each mention in each CorefCluster) */
  private void initializeCorefCluster() {
    for (int i = 0; i < predictedOrderedMentionsBySentence.size(); i++) {
      for (int j = 0; j < predictedOrderedMentionsBySentence.get(i).size(); j++) {
        Mention m = predictedOrderedMentionsBySentence.get(i).get(j);
        if (allPredictedMentions.containsKey(m.mentionID)) {
          SieveCoreferenceSystem.logger.warning("WARNING: Already contain mention " + m.mentionID);
          Mention m1 = allPredictedMentions.get(m.mentionID);
          SieveCoreferenceSystem.logger.warning(
              "OLD mention: " + m1.spanToString() + "[" + m1.startIndex + "," + m1.endIndex + "]");
          SieveCoreferenceSystem.logger.warning(
              "NEW mention: " + m.spanToString() + "[" + m.startIndex + "," + m.endIndex + "]");
          //          SieveCoreferenceSystem.debugPrintMentions(System.err, "PREDICTED ORDERED",
          // predictedOrderedMentionsBySentence);
          //          SieveCoreferenceSystem.debugPrintMentions(System.err, "GOLD ORDERED",
          // goldOrderedMentionsBySentence);
        }
        assert (!allPredictedMentions.containsKey(m.mentionID));
        allPredictedMentions.put(m.mentionID, m);

        IntTuple pos = new IntTuple(2);
        pos.set(0, i);
        pos.set(1, j);
        positions.put(m, pos);
        m.sentNum = i;

        assert (!corefClusters.containsKey(m.mentionID));
        corefClusters.put(
            m.mentionID, new CorefCluster(m.mentionID, Generics.newHashSet(Arrays.asList(m))));
        m.corefClusterID = m.mentionID;

        IntTuple headPosition = new IntTuple(2);
        headPosition.set(0, i);
        headPosition.set(1, m.headIndex);
        mentionheadPositions.put(headPosition, m);
      }
    }
  }
Пример #5
0
    public CorefMention(Mention m, IntTuple pos) {
      mentionType = m.mentionType;
      number = m.number;
      gender = m.gender;
      animacy = m.animacy;
      startIndex = m.startIndex + 1;
      endIndex = m.endIndex + 1;
      headIndex = m.headIndex + 1;
      corefClusterID = m.corefClusterID;
      sentNum = m.sentNum + 1;
      mentionID = m.mentionID;
      mentionSpan = m.spanToString();

      // index starts from 1
      position = new IntTuple(2);
      position.set(0, pos.get(0) + 1);
      position.set(1, pos.get(1) + 1);

      m.headWord.set(CorefCoreAnnotations.CorefClusterIdAnnotation.class, corefClusterID);
    }