Beispiel #1
0
 public static void standfordNLP() {
   CoreLabelTokenFactory ctf = new CoreLabelTokenFactory();
   PTBTokenizer<CoreLabel> ptb =
       new PTBTokenizer<>(new StringReader(paragraph), ctf, "invertible=true");
   while (ptb.hasNext()) {
     CoreLabel cl = ptb.next();
     System.out.print(
         cl.originalText() + " [" + cl.beginPosition() + "-" + cl.endPosition() + "];");
   }
   System.out.println();
 }
  public static void resolveAnaphora(String text) {

    RedwoodConfiguration.empty().capture(System.err).apply();

    Annotation document = new Annotation(text);
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    props.put("dcoref.female", "female.unigram.txt");
    props.put("dcoref.male", "male.unigram.txt");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);

    RedwoodConfiguration.current().clear().apply();

    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
    List<CoreMap> stnfrdSentences = document.get(SentencesAnnotation.class);

    ImmutableMultimap.Builder<Integer, Pair<CorefChain, CorefMention>> records =
        ImmutableMultimap.builder();
    ImmutableMultimap.Builder<Integer, Pair<CorefChain, CorefMention>> recordsOrdered =
        ImmutableMultimap.builder();

    graph.forEach(
        (key, value) -> {
          value
              .getMentionMap()
              .forEach(
                  (intPair, corefSet) -> {
                    corefSet.forEach(
                        mention -> records.put(mention.sentNum, Pair.of(value, mention)));
                  });
        });

    recordsOrdered =
        records.orderKeysBy(
            new Comparator<Integer>() {
              @Override
              public int compare(Integer o1, Integer o2) {
                return o1 - o2;
              }
            });

    recordsOrdered
        .build()
        .asMap()
        .forEach(
            (sentNum, mentionList) -> {
              CoreMap sentence = stnfrdSentences.get(sentNum - 1);
              List<CoreLabel> stnfrdtokens = sentence.get(TokensAnnotation.class);

              mentionList.forEach(
                  pair -> {
                    CorefChain chain = pair.getLeft();
                    CorefMention mention = pair.getRight();
                    String root = chain.getRepresentativeMention().mentionSpan;

                    if (!mention.mentionSpan.equalsIgnoreCase(root)
                        && (!root.contains(mention.mentionSpan)
                            && !mention.mentionSpan.contains(root))
                        && (!replacementList.contains(root.toLowerCase()))
                        && (root.split("\\s").length < 3)
                        && (replacementList.contains(mention.mentionSpan.toLowerCase()))) {
                      if (mention.mentionSpan.equalsIgnoreCase("her")
                          || mention.mentionSpan.equalsIgnoreCase("his")) {
                        root += "'s";
                      }
                      stnfrdtokens.get(mention.startIndex - 1).setOriginalText(root);
                    }
                  });

              String sent = "";
              for (CoreLabel token : stnfrdtokens) {
                sent += token.originalText() + " ";
              }
              ;
              resolvedSentences += sent + "\n";
            });
  }
Beispiel #3
0
  /** ************************************************************ */
  private boolean needSpaceBefore(CoreLabel label) {

    return needSpaceBefore(label.originalText());
  }