/** Process discourse information */ protected void processDiscourse(Dictionaries dict) { docType = findDocType(dict); markQuotations(this.annotation.get(CoreAnnotations.SentencesAnnotation.class), false); findSpeakers(dict); // find 'speaker mention' for each mention for (Mention m : allPredictedMentions.values()) { int utter = m.headWord.get(CoreAnnotations.UtteranceAnnotation.class); String speaker = m.headWord.get(CoreAnnotations.SpeakerAnnotation.class); if (speaker != null) { // Populate speaker info SpeakerInfo speakerInfo = speakerInfoMap.get(speaker); if (speakerInfo == null) { speakerInfoMap.put(speaker, speakerInfo = new SpeakerInfo(speaker)); // span indicates this is the speaker if (Rules.mentionMatchesSpeaker(m, speakerInfo, true)) { m.speakerInfo = speakerInfo; } } if (NumberMatchingRegex.isDecimalInteger(speaker)) { try { int speakerMentionID = Integer.parseInt(speaker); if (utter != 0) { // Add pairs of mention id and the mention id of the speaker speakerPairs.add(new Pair<Integer, Integer>(m.mentionID, speakerMentionID)); // speakerPairs.add(new Pair<Integer, Integer>(speakerMentionID, // m.mentionID)); } } catch (Exception e) { // no mention found for the speaker // nothing to do } } } // set generic 'you' : e.g., you know in conversation if (docType != DocType.ARTICLE && m.person == Person.YOU && m.endIndex < m.sentenceWords.size() - 1 && m.sentenceWords .get(m.endIndex) .get(CoreAnnotations.TextAnnotation.class) .equalsIgnoreCase("know")) { m.generic = true; } } // now that we have identified the speakers, first pass to check if mentions should cluster with // the speakers for (Mention m : allPredictedMentions.values()) { if (m.speakerInfo == null) { for (SpeakerInfo speakerInfo : speakerInfoMap.values()) { if (speakerInfo.hasRealSpeakerName()) { // do loose match - assumes that there isn't that many speakers.... if (Rules.mentionMatchesSpeaker(m, speakerInfo, false)) { m.speakerInfo = speakerInfo; break; } } } } } }