Java Mention.originalRef примеры использования

Язык программирования: Java

Класс/Тип: Mention

Метод/Функция: originalRef

Примеров на hotexamples.com: 3

Java Mention.originalRef - 3 примера найдено. Это лучшие примеры Java кода для Mention.originalRef, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

mentionID(6)

context(5)

spanToString(5)

offset(5)

name(5)

length(5)

contextAroundMention(5)

key(4)

startIndex(3)

dependency(3)

endIndex(3)

originalRef(3)

goldCorefClusterID(3)

senses(2)

originalSpan(2)

markCoreferent(2)

twinless(2)

markSingleton(1)

headWord(1)

headToken(1)

gloss(1)

getSingletonFeatures(1)

paragraph(1)

generic(1)

sentNum(1)

equals(1)

speakerInfo(1)

corefClusterID(1)

Пример #1

Показать файл

Файл: CoNLLMentionExtractor.java Проект: nipengadmaster/CoreNLP

  public List<List<Mention>> extractGoldMentions(CoNLL2011DocumentReader.Document conllDoc) {
    List<CoreMap> sentences =
        conllDoc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>();
    CollectionValuedMap<String, CoreMap> corefChainMap = conllDoc.getCorefChainMap();
    for (int i = 0; i < sentences.size(); i++) {
      allGoldMentions.add(new ArrayList<Mention>());
    }
    int maxCorefClusterId = -1;
    for (String corefIdStr : corefChainMap.keySet()) {
      int id = Integer.parseInt(corefIdStr);
      if (id > maxCorefClusterId) {
        maxCorefClusterId = id;
      }
    }
    int newMentionID = maxCorefClusterId + 1;
    for (String corefIdStr : corefChainMap.keySet()) {
      int id = Integer.parseInt(corefIdStr);
      int clusterMentionCnt = 0;
      for (CoreMap m : corefChainMap.get(corefIdStr)) {
        clusterMentionCnt++;
        Mention mention = new Mention();

        mention.goldCorefClusterID = id;
        if (clusterMentionCnt == 1) {
          // First mention in cluster
          mention.mentionID = id;
          mention.originalRef = -1;
        } else {
          mention.mentionID = newMentionID;
          mention.originalRef = id;
          newMentionID++;
        }
        if (maxID < mention.mentionID) maxID = mention.mentionID;
        int sentIndex = m.get(CoreAnnotations.SentenceIndexAnnotation.class);
        CoreMap sent = sentences.get(sentIndex);
        mention.startIndex =
            m.get(CoreAnnotations.TokenBeginAnnotation.class)
                - sent.get(CoreAnnotations.TokenBeginAnnotation.class);
        mention.endIndex =
            m.get(CoreAnnotations.TokenEndAnnotation.class)
                - sent.get(CoreAnnotations.TokenBeginAnnotation.class);

        // will be set by arrange
        mention.originalSpan = m.get(CoreAnnotations.TokensAnnotation.class);

        // Mention dependency is collapsed dependency for sentence
        mention.dependency =
            sentences
                .get(sentIndex)
                .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);

        allGoldMentions.get(sentIndex).add(mention);
      }
    }
    return allGoldMentions;
  }

Пример #2

Показать файл

Файл: MUCMentionExtractor.java Проект: taarraas/matrixfactorization

  @Override
  public Document nextDoc() throws Exception {
    List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>();
    List<Tree> allTrees = new ArrayList<Tree>();
    List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>();
    List<List<Mention>> allPredictedMentions;
    List<CoreMap> allSentences = new ArrayList<CoreMap>();
    Annotation docAnno = new Annotation("");

    Pattern docPattern =
        Pattern.compile("<DOC>(.*?)</DOC>", Pattern.DOTALL + Pattern.CASE_INSENSITIVE);
    Pattern sentencePattern =
        Pattern.compile(
            "(<s>|<hl>|<dd>|<DATELINE>)(.*?)(</s>|</hl>|</dd>|</DATELINE>)",
            Pattern.DOTALL + Pattern.CASE_INSENSITIVE);
    Matcher docMatcher = docPattern.matcher(fileContents);
    if (!docMatcher.find(currentOffset)) return null;

    currentOffset = docMatcher.end();
    String doc = docMatcher.group(1);
    Matcher sentenceMatcher = sentencePattern.matcher(doc);
    String ner = null;

    // Maintain current document ID.
    Pattern docIDPattern =
        Pattern.compile("<DOCNO>(.*?)</DOCNO>", Pattern.DOTALL + Pattern.CASE_INSENSITIVE);
    Matcher docIDMatcher = docIDPattern.matcher(doc);
    if (docIDMatcher.find()) currentDocumentID = docIDMatcher.group(1);
    else currentDocumentID = "documentAfter " + currentDocumentID;

    while (sentenceMatcher.find()) {
      String sentenceString = sentenceMatcher.group(2);
      List<CoreLabel> words =
          tokenizerFactory.getTokenizer(new StringReader(sentenceString)).tokenize();

      // FIXING TOKENIZATION PROBLEMS
      for (int i = 0; i < words.size(); i++) {
        CoreLabel w = words.get(i);
        if (i > 0 && w.word().equals("$")) {
          if (!words.get(i - 1).word().endsWith("PRP") && !words.get(i - 1).word().endsWith("WP"))
            continue;
          words.get(i - 1).set(CoreAnnotations.TextAnnotation.class, words.get(i - 1).word() + "$");
          words.remove(i);
          i--;
        } else if (w.word().equals("\\/")) {
          if (words.get(i - 1).word().equals("</COREF>")) continue;
          w.set(
              CoreAnnotations.TextAnnotation.class,
              words.get(i - 1).word() + "\\/" + words.get(i + 1).word());
          words.remove(i + 1);
          words.remove(i - 1);
        }
      }
      // END FIXING TOKENIZATION PROBLEMS

      List<CoreLabel> sentence = new ArrayList<CoreLabel>();
      // MUC accepts embedded coref mentions, so we need to keep a stack for the mentions currently
      // open
      Stack<Mention> stack = new Stack<Mention>();
      List<Mention> mentions = new ArrayList<Mention>();

      allWords.add(sentence);
      allGoldMentions.add(mentions);

      for (CoreLabel word : words) {
        String w = word.get(CoreAnnotations.TextAnnotation.class);
        // found regular token: WORD/POS
        if (!w.startsWith("<") && w.contains("\\/") && w.lastIndexOf("\\/") != w.length() - 2) {
          int i = w.lastIndexOf("\\/");
          String w1 = w.substring(0, i);
          // we do NOT set POS info here. We take the POS tags from the parser!
          word.set(CoreAnnotations.TextAnnotation.class, w1);
          word.remove(CoreAnnotations.OriginalTextAnnotation.class);
          if (Constants.USE_GOLD_NE) {
            if (ner != null) {
              word.set(CoreAnnotations.NamedEntityTagAnnotation.class, ner);
            } else {
              word.set(CoreAnnotations.NamedEntityTagAnnotation.class, "O");
            }
          }
          sentence.add(word);
        }
        // found the start SGML tag for a NE, e.g., "<ORGANIZATION>"
        else if (w.startsWith("<") && !w.startsWith("<COREF") && !w.startsWith("</")) {
          Pattern nerPattern = Pattern.compile("<(.*?)>");
          Matcher m = nerPattern.matcher(w);
          m.find();
          ner = m.group(1);
        }
        // found the end SGML tag for a NE, e.g., "</ORGANIZATION>"
        else if (w.startsWith("</") && !w.startsWith("</COREF")) {
          Pattern nerPattern = Pattern.compile("</(.*?)>");
          Matcher m = nerPattern.matcher(w);
          m.find();
          String ner1 = m.group(1);
          if (ner != null && !ner.equals(ner1))
            throw new RuntimeException("Unmatched NE labels in MUC file: " + ner + " v. " + ner1);
          ner = null;
        }
        // found the start SGML tag for a coref mention
        else if (w.startsWith("<COREF")) {
          Mention mention = new Mention();
          // position of this mention in the sentence
          mention.startIndex = sentence.size();

          // extract GOLD info about this coref chain. needed for eval
          Pattern idPattern = Pattern.compile("ID=\"(.*?)\"");
          Pattern refPattern = Pattern.compile("REF=\"(.*?)\"");

          Matcher m = idPattern.matcher(w);
          m.find();
          mention.mentionID = Integer.parseInt(m.group(1));

          m = refPattern.matcher(w);
          if (m.find()) {
            mention.originalRef = Integer.parseInt(m.group(1));
          }

          // open mention. keep track of all open mentions using the stack
          stack.push(mention);
        }
        // found the end SGML tag for a coref mention
        else if (w.equals("</COREF>")) {
          Mention mention = stack.pop();
          mention.endIndex = sentence.size();

          // this is a closed mention. add it to the final list of mentions
          // System.err.printf("Found MENTION: ID=%d, REF=%d\n", mention.mentionID,
          // mention.originalRef);
          mentions.add(mention);
        } else {
          word.remove(CoreAnnotations.OriginalTextAnnotation.class);
          if (Constants.USE_GOLD_NE) {
            if (ner != null) {
              word.set(CoreAnnotations.NamedEntityTagAnnotation.class, ner);
            } else {
              word.set(CoreAnnotations.NamedEntityTagAnnotation.class, "O");
            }
          }
          sentence.add(word);
        }
      }
      StringBuilder textContent = new StringBuilder();
      for (int i = 0; i < sentence.size(); i++) {
        CoreLabel w = sentence.get(i);
        w.set(CoreAnnotations.IndexAnnotation.class, i + 1);
        w.set(CoreAnnotations.UtteranceAnnotation.class, 0);
        if (i > 0) textContent.append(" ");
        textContent.append(w.getString(CoreAnnotations.TextAnnotation.class));
      }
      CoreMap sentCoreMap = new Annotation(textContent.toString());
      allSentences.add(sentCoreMap);
      sentCoreMap.set(CoreAnnotations.TokensAnnotation.class, sentence);
    }

    // assign goldCorefClusterID
    Map<Integer, Mention> idMention = Generics.newHashMap(); // temporary use
    for (List<Mention> goldMentions : allGoldMentions) {
      for (Mention m : goldMentions) {
        idMention.put(m.mentionID, m);
      }
    }
    for (List<Mention> goldMentions : allGoldMentions) {
      for (Mention m : goldMentions) {
        if (m.goldCorefClusterID == -1) {
          if (m.originalRef == -1) m.goldCorefClusterID = m.mentionID;
          else {
            int ref = m.originalRef;
            while (true) {
              Mention m2 = idMention.get(ref);
              if (m2.goldCorefClusterID != -1) {
                m.goldCorefClusterID = m2.goldCorefClusterID;
                break;
              } else if (m2.originalRef == -1) {
                m2.goldCorefClusterID = m2.mentionID;
                m.goldCorefClusterID = m2.goldCorefClusterID;
                break;
              } else {
                ref = m2.originalRef;
              }
            }
          }
        }
      }
    }

    docAnno.set(CoreAnnotations.SentencesAnnotation.class, allSentences);
    stanfordProcessor.annotate(docAnno);

    if (allSentences.size() != allWords.size())
      throw new IllegalStateException("allSentences != allWords");
    for (int i = 0; i < allSentences.size(); i++) {
      List<CoreLabel> annotatedSent =
          allSentences.get(i).get(CoreAnnotations.TokensAnnotation.class);
      List<CoreLabel> unannotatedSent = allWords.get(i);
      List<Mention> mentionInSent = allGoldMentions.get(i);
      for (Mention m : mentionInSent) {
        m.dependency =
            allSentences
                .get(i)
                .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
      }
      if (annotatedSent.size() != unannotatedSent.size()) {
        throw new IllegalStateException("annotatedSent != unannotatedSent");
      }
      for (int j = 0, sz = annotatedSent.size(); j < sz; j++) {
        CoreLabel annotatedWord = annotatedSent.get(j);
        CoreLabel unannotatedWord = unannotatedSent.get(j);
        if (!annotatedWord
            .get(CoreAnnotations.TextAnnotation.class)
            .equals(unannotatedWord.get(CoreAnnotations.TextAnnotation.class))) {
          throw new IllegalStateException("annotatedWord != unannotatedWord");
        }
      }
      allWords.set(i, annotatedSent);
      allTrees.add(allSentences.get(i).get(TreeCoreAnnotations.TreeAnnotation.class));
    }

    // extract predicted mentions
    if (Constants.USE_GOLD_MENTIONS) allPredictedMentions = allGoldMentions;
    else
      allPredictedMentions = mentionFinder.extractPredictedMentions(docAnno, maxID, dictionaries);

    // add the relevant fields to mentions and order them for coref
    return arrange(docAnno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
  }

Пример #3

Показать файл

Файл: Document.java Проект: taarraas/matrixfactorization

  /** Extract gold coref link information */
  protected void extractGoldLinks() {
    //    List<List<Mention>> orderedMentionsBySentence = this.getOrderedMentions();
    List<Pair<IntTuple, IntTuple>> links = new ArrayList<Pair<IntTuple, IntTuple>>();

    // position of each mention in the input matrix, by id
    Map<Integer, IntTuple> positions = Generics.newHashMap();
    // positions of antecedents
    Map<Integer, List<IntTuple>> antecedents = Generics.newHashMap();
    for (int i = 0; i < goldOrderedMentionsBySentence.size(); i++) {
      for (int j = 0; j < goldOrderedMentionsBySentence.get(i).size(); j++) {
        Mention m = goldOrderedMentionsBySentence.get(i).get(j);
        int id = m.mentionID;
        IntTuple pos = new IntTuple(2);
        pos.set(0, i);
        pos.set(1, j);
        positions.put(id, pos);
        antecedents.put(id, new ArrayList<IntTuple>());
      }
    }

    //    SieveCoreferenceSystem.debugPrintMentions(System.err, "", goldOrderedMentionsBySentence);
    for (List<Mention> mentions : goldOrderedMentionsBySentence) {
      for (Mention m : mentions) {
        int id = m.mentionID;
        IntTuple src = positions.get(id);

        assert (src != null);
        if (m.originalRef >= 0) {
          IntTuple dst = positions.get(m.originalRef);
          if (dst == null) {
            throw new RuntimeException("Cannot find gold mention with ID=" + m.originalRef);
          }

          // to deal with cataphoric annotation
          while (dst.get(0) > src.get(0) || (dst.get(0) == src.get(0) && dst.get(1) > src.get(1))) {
            Mention dstMention = goldOrderedMentionsBySentence.get(dst.get(0)).get(dst.get(1));
            m.originalRef = dstMention.originalRef;
            dstMention.originalRef = id;

            if (m.originalRef < 0) break;
            dst = positions.get(m.originalRef);
          }
          if (m.originalRef < 0) continue;

          // A B C: if A<-B, A<-C => make a link B<-C
          for (int k = dst.get(0); k <= src.get(0); k++) {
            for (int l = 0; l < goldOrderedMentionsBySentence.get(k).size(); l++) {
              if (k == dst.get(0) && l < dst.get(1)) continue;
              if (k == src.get(0) && l > src.get(1)) break;
              IntTuple missed = new IntTuple(2);
              missed.set(0, k);
              missed.set(1, l);
              if (links.contains(new Pair<IntTuple, IntTuple>(missed, dst))) {
                antecedents.get(id).add(missed);
                links.add(new Pair<IntTuple, IntTuple>(src, missed));
              }
            }
          }

          links.add(new Pair<IntTuple, IntTuple>(src, dst));

          assert (antecedents.get(id) != null);
          antecedents.get(id).add(dst);

          List<IntTuple> ants = antecedents.get(m.originalRef);
          assert (ants != null);
          for (IntTuple ant : ants) {
            antecedents.get(id).add(ant);
            links.add(new Pair<IntTuple, IntTuple>(src, ant));
          }
        }
      }
    }
    goldLinks = links;
  }