private static List<AnaphorWithReferent> parseText(InputText text) { Annotation annotatedText = new Annotation(text.toString()); Container.getStanfordCoreNLP().annotate(annotatedText); List<CoreMap> coreMapSentences = annotatedText.get(CoreAnnotations.SentencesAnnotation.class); List<Tree> trees = coreMapSentences .stream() .map(s -> s.get(TreeCoreAnnotations.TreeAnnotation.class)) .collect(Collectors.toList()); List<Sentence> allSentences = IntStream.range(0, trees.size()) .mapToObj( id -> new Sentence( id, trees.get(id), Container.getNPsFromParseTreeExtractor().extract(trees.get(id)))) .collect(Collectors.toList()); List<AnaphorWithReferent> anaphoraWithReferentFromAllSentences = allSentences .stream() .map(s -> Container.getAllAnaphorWithReferentPerSentenceFinder().find(s, allSentences)) .flatMap(a -> a.stream()) .collect(Collectors.toList()); return anaphoraWithReferentFromAllSentences; }
public List<Result> find(InputText inputText) { List<Result> results = new ArrayList<>(); // int validSentences = inputText.getSentences().stream().mapToInt(s -> s.isValid() ? 1 : // 0).sum(); // System.out.println(String.format("Total valid sentences to parse: %s/%s", // validSentences, inputText.getSentences().size())); List<AnaphorWithReferent> anaphorsWithReferents = parseText(inputText); List<Result> resultsForText; if (inputText.isTagged()) { resultsForText = compareWithInputText(inputText, anaphorsWithReferents); results.addAll(resultsForText); } else { resultsForText = generateResultsFromParsingResults(inputText, anaphorsWithReferents); results.addAll(resultsForText); } System.out.println("Total of " + results.size() + " results for text: " + inputText.getId()); return results; }
private static List<Result> generateResultsFromParsingResults( InputText inputText, List<AnaphorWithReferent> anaphorsWithReferents) { List<Result> results = new ArrayList<>(); for (AnaphorWithReferent anaphorWithReferent : anaphorsWithReferents) { Result result = new Result(inputText.getId(), anaphorWithReferent.getAnaphor().getSentence().getId()); result.setSentenceText(anaphorWithReferent.getAnaphor().getSentence().getText()); result.setPronoun(anaphorWithReferent.getAnaphor().toString()); result.setPronounTokenIndex(anaphorWithReferent.getAnaphor().getStartIndex()); result.setActualReferentStartIndex(anaphorWithReferent.getReferent().getStartIndex()); result.setActualReferentText(anaphorWithReferent.getReferent().toString()); result.setActualReferentSentenceId(anaphorWithReferent.getReferent().getSentence().getId()); results.add(result); } return results; }
private static List<Result> compareWithInputText( InputText inputText, List<AnaphorWithReferent> anaphorsWithReferents) { List<Result> results = new ArrayList<>(); for (InputSentence inputSentence : inputText.getSentences()) { if (!inputSentence.isValid()) { continue; } List<Coref> pronominalCorefs = inputSentence.getPronominalCorefs(); Result result; if (pronominalCorefs.size() == 0) { result = new Result(inputText.getId(), inputSentence.getSentenceId()); result.setSentenceText(inputText.getSentenceById(inputSentence.getSentenceId()).toString()); result.setExpectedReferentSentenceId(null); result.setExpectedReferentStartIndex(null); result.setExpectedReferentText(null); // Assuming only one List<AnaphorWithReferent> anaphorsForSentence = getAnaphorsForSentence(anaphorsWithReferents, inputSentence.getSentenceId()); if (anaphorsForSentence.size() == 0) { continue; } AnaphorWithReferent anaphorForSentence = anaphorsForSentence.get(0); // TODO: check if the same anaphor NpTreeInSentence referent = anaphorForSentence.getReferent(); result.setActualReferentSentenceId(referent.getSentence().getId()); result.setActualReferentText(referent.toString()); result.setActualReferentStartIndex(referent.getStartIndex()); results.add(result); continue; } for (Coref inputPronominal : pronominalCorefs) { result = new Result(inputText.getId(), inputSentence.getSentenceId()); result.setSentenceText(inputText.getSentenceById(inputSentence.getSentenceId()).toString()); int sentenceId = inputPronominal.getSentenceId(); int pronominalIndex = inputPronominal.getStartIndex(); result.setPronoun(inputPronominal.toString()); result.setPronounTokenIndex(pronominalIndex); List<Coref> expectedReferentsForPronominal = inputText.getReferentsForPronominal(inputPronominal.getId()); if (expectedReferentsForPronominal.size() >= 1) { // We take the first referent from the sentence. List<String> corefs = expectedReferentsForPronominal .stream() .map(c -> c.toString()) .collect(Collectors.toList()); List<Integer> sentenceIndices = expectedReferentsForPronominal .stream() .map(c -> c.getSentenceId()) .collect(Collectors.toList()); List<Integer> startIndices = expectedReferentsForPronominal .stream() .map(c -> c.getStartIndex()) .collect(Collectors.toList()); result.setExpectedReferentSentenceId(sentenceIndices); result.setExpectedReferentText(corefs); result.setExpectedReferentStartIndex(startIndices); } else { result.setExpectedReferentSentenceId(null); result.setExpectedReferentText(null); result.setExpectedReferentStartIndex(null); } updateResultWithActualReferents( result, anaphorsWithReferents, sentenceId, pronominalIndex, inputPronominal); results.add(result); } } return results; }