/** * Send the email message * * @param emailmessage Email message */ private void sendMailMessage( EmailMessage emailmessage, String defaultRecipientEmail, String defaultRecipientName) { try { InternetAddress defaultAddress = new InternetAddress(defaultRecipientEmail, defaultRecipientName); EmailUtils.sendMailMessage(_mailsession, emailmessage, defaultAddress); } catch (UnsupportedEncodingException e) { _logger.error(e); } }
/** * generates protovis string for group activity (in/out) chart + all names in the group. * normalized across all groups. optionally group members names are included. * * @return */ public static List<String> getProtovisForGroups( AddressBook addressBook, List<SimilarGroup<String>> groups, Collection<EmailDocument> allDocs, int nIntervals, int width, int height, boolean generateNames) { // compute in/out dates for each group List<Date>[] inDates = new ArrayList[groups.size()]; List<Date>[] outDates = new ArrayList[groups.size()]; for (int i = 0; i < groups.size(); i++) { inDates[i] = new ArrayList<Date>(); outDates[i] = new ArrayList<Date>(); } for (EmailDocument ed : allDocs) { List<String> rawEmailAddrs = ed.getParticipatingAddrsExcept(addressBook.getOwnAddrs()); List<String> canonicalEmailAddrs = addressBook.convertToCanonicalAddrs(rawEmailAddrs); Collections.sort(canonicalEmailAddrs); Group<String> emailGroup = new Group<String>(canonicalEmailAddrs); int x = Group.bestFit(groups, emailGroup); if (x != -1) { int sentOrReceived = ed.sentOrReceived(addressBook); if ((sentOrReceived & EmailDocument.RECEIVED_MASK) != 0) inDates[x].add(ed.date); if ((sentOrReceived & EmailDocument.SENT_MASK) != 0) outDates[x].add(ed.date); } } // find normalizing max int max = Integer.MIN_VALUE; Pair<Date, Date> p = EmailUtils.getFirstLast(allDocs); Date globalStart = p.getFirst(); Date globalEnd = p.getSecond(); List<Date> intervals = CalendarUtil.divideIntoIntervals(globalStart, globalEnd, nIntervals); for (int i = 0; i < groups.size(); i++) { int x = normalizingMax(inDates[i], outDates[i], intervals, /* inNOut */ true); if (x >= max) max = x; } // generate protovis List<String> result = new ArrayList<String>(); for (int i = 0; i < groups.size(); i++) { int[] inGram = CalendarUtil.computeHistogram(inDates[i], intervals); // double[] normalizedInGram = Util.normalizeHistogramToBase(inGram, max); int[] outGram = CalendarUtil.computeHistogram(outDates[i], intervals); // double[] normalizedOutGram = Util.normalizeHistogramToBase(outGram, max); String url = JSPHelper.getURLForGroupMessages(i); StringBuilder sb = new StringBuilder(); sb.append( getProtoVizBox( null, outDates[i].size(), inDates[i].size(), outGram, inGram, max, width, height, true, true, intervals.get(0), intervals.get(intervals.size() - 1), true /*focusOnly*/, "'" + url + "'")); // add names to the mark if needed if (generateNames) { sb.append("<br/><span style=\"font-size:small\">"); for (String str : groups.get(i).elements) sb.append(Util.strippedEmailAddress(str) + "<br/>"); sb.append( "<a href=\"" + url + "\" target=\"_new\"><img title=\"Messages\" src=\"/muse/images/email.jpg\" width=\"25\"/>" + "</a>"); sb.append("</span>"); } result.add(sb.toString()); } return result; }
/** Generates person names tests from the given archive. @throws IOException */ public void generatePersonNameQuestions( Archive archive, NERModel nerModel, Collection<EmailDocument> allDocs, Lexicon lex, int numClues) throws IOException, GeneralSecurityException, ClassNotFoundException, ReadContentsException, ParseException { this.archive = archive; questions = new ArrayList<>(); ArchiveCluer cluer = new ArchiveCluer(null, archive, nerModel, null, lex); tabooCluesSet = new LinkedHashSet<>(); archive.assignThreadIds(); List<ClueEvaluator> evaluators = getDefaultEvals(); List<Document> docs = archive.getAllDocs(); Multimap<Contact, EmailDocument> contactToMessages = LinkedHashMultimap.create(); Multimap<Contact, Long> contactToThreadIds = LinkedHashMultimap.create(); // sort by date Collections.sort(docs); Date earliestDate = null, latestDate = null; Map<Contact, Date> contactToLatestDate = new LinkedHashMap<>(); // compute earliest and latest date across all messages in corpus for (Document doc : docs) { EmailDocument ed = (EmailDocument) doc; if (earliestDate == null || ed.date.before(earliestDate)) earliestDate = ed.date; if (latestDate == null || ed.date.after(latestDate)) latestDate = ed.date; } JSPHelper.log.info( "===================\nStarting to generate person names memory questions from " + docs.size() + " messages with " + numClues + " questions" + ", earliest date = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(earliestDate) + " latest date = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(latestDate)); Set<Integer> tabooSentenceHashes = new LinkedHashSet<>(); // create hashes of all sentences seen at least twice (case insensitive, lower cased) { Set<Integer> hashesSeen = new LinkedHashSet<>(); for (Document d : docs) { String contents = archive.getContents(d, true); String cleanedContents = EmailUtils.cleanupEmailMessage(contents); SentenceTokenizer st = new SentenceTokenizer(cleanedContents); while (st.hasMoreSentences()) { String sentence = st.nextSentence(); sentence = canonicalizeSentence(sentence); int hashCode = sentence.hashCode(); if (hashesSeen.contains(hashCode)) { tabooSentenceHashes.add(hashCode); log.info("Marking sentence as taboo: " + sentence); } else hashesSeen.add(hashCode); } } } // compute contactToLatestDate that contact has been seen on for (Document doc : docs) { EmailDocument ed = (EmailDocument) doc; // discard doc if it is not a sent mail if ((ed.sentOrReceived(archive.addressBook) & EmailDocument.SENT_MASK) == 0) continue; for (Contact c : ed.getParticipatingContactsExceptOwn(archive.addressBook)) { Date currentLatestDate = contactToLatestDate.get(c); if (currentLatestDate == null || currentLatestDate.before(ed.date)) contactToLatestDate.put(c, ed.date); contactToMessages.put(c, ed); contactToThreadIds.put(c, ed.threadID); } } log.info("We are considering " + contactToLatestDate.size() + " contacts"); Date currentDate = new Date(); List<Pair<Date, Date>> intervals = computeDateIntervals(earliestDate, currentDate); // go back from current date // intervals[0] is the most recent. JSPHelper.log.info("done computing " + intervals.size() + " intervals"); for (Pair<Date, Date> p : intervals) JSPHelper.log.info( "Interval: " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getFirst()) + " - " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getSecond())); int cluesPerInterval = (numClues > 0 && intervals.size() > 0) ? (numClues + intervals.size() - 1) / intervals.size() : 0; JSPHelper.log.info( "Will try to generate " + Util.pluralize(cluesPerInterval, "questions") + " per interval"); Multimap<Integer, Contact> intervalToContacts = LinkedHashMultimap.create(); // nSent is the number of sentences allowed in a clue text int nSent = 2; for (Contact c : contactToLatestDate.keySet()) { Date lastSeenDate = contactToLatestDate.get(c); // which interval does this date belong to? we'll assign this contact in that interval in the // intervalToContacts map int interval = -1; Date intervalStart = null, intervalEnd = null; { int i = 0; for (Pair<Date, Date> p : intervals) { intervalStart = p.getFirst(); intervalEnd = p.getSecond(); if ((intervalStart.before(lastSeenDate) && intervalEnd.after(lastSeenDate)) || intervalStart.equals(lastSeenDate) || intervalEnd.equals(lastSeenDate)) { interval = i; break; } i++; } } if (interval < 0 || interval == intervals.size()) { JSPHelper.log.info( "What, no interval!? for " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(lastSeenDate)); continue; } intervalToContacts.put(interval, c); } log.info("Interval information (interval 0 is the most recent):"); for (int interval = 0; interval < intervals.size(); interval++) { Collection<Contact> contacts = intervalToContacts.get(interval); int nContactsForThisInterval = (contacts == null) ? 0 : contacts.size(); log.info( "In interval " + interval + " there are " + Util.pluralize(nContactsForThisInterval, "candidate contact") + " who were last seen in this interval"); } for (int interval = 0; interval < intervals.size(); interval++) { Date intervalStart = intervals.get(interval).getFirst(); Date intervalEnd = intervals.get(interval).getSecond(); Collection<Contact> candidateContactsForThisInterval = intervalToContacts.get(interval); if (candidateContactsForThisInterval == null) { log.info("Skipping interval " + interval + " because there are no contacts"); continue; } Map<Clue, Contact> clueToContact = new LinkedHashMap<>(); log.info("=======\nGenerating questions for interval " + interval); outer: for (Contact c : candidateContactsForThisInterval) { String name = c.pickBestName(); if (name.length() < 2) // could also check if alphanumberic only continue outer; // ignore contact if name does not contain all alphabets. Even a period is not allowed. only // space is allowed. for (char ch : name.toCharArray()) { if (!Character.isAlphabetic(ch) && !Character.isSpaceChar(ch)) continue outer; } Clue clue = cluer.createPersonNameClue( c, evaluators, nerModel, intervalStart, intervalEnd, nSent, archive, tabooSentenceHashes); if (clue != null) clueToContact.put(clue, c); } List<Clue> clueList = new ArrayList(clueToContact.keySet()); Collections.sort(clueList); List<Clue> selectedClues = new ArrayList<>(); for (int i = 0; i < cluesPerInterval && i < clueList.size(); i++) { selectedClues.add(clueList.get(i)); } log.info( "For interval " + interval + " selected " + selectedClues.size() + " contacts out of " + clueList.size() + " possible candidates."); // for (Clue c: clueList) // log.info ("Clue candidate for " + clueToContact.get(c).pickBestName() + " // score = " + c.clueStats.finalScore+ " clue is " + c ); // for (Clue c: selectedClues) // log.info ("Selected clue: " + clueToContact.get(c).pickBestName() + " score = " // + c.clueStats.finalScore+ " clue is " + c); for (Clue selectedClue : selectedClues) { Contact c = clueToContact.get(selectedClue); String name = c.pickBestName(); List<Integer> lengthList = Crossword.convertToWord(name).getSecond(); String lengthDescr = ""; if (lengthList.size() > 1) lengthDescr += Integer.toString(lengthList.size()) + " words: "; for (Integer i : lengthList) { lengthDescr += Util.pluralize(i, "letter") + ", "; } lengthDescr = lengthDescr.substring(0, lengthDescr.length() - 2); // subtract the extra comma. ClueInfo ci = new ClueInfo(); ci.lastSeenDate = contactToLatestDate.get(c); ci.nMessages = contactToThreadIds.get(c).size(); ci.nThreads = contactToThreadIds.get(c).size(); questions.add(new MemoryQuestion(this, name, selectedClue, 1, lengthDescr)); } } log.info(questions.size() + " questions generated"); log.info("Top candidates are:"); // sort q's by clue score Collections.sort(questions); // log.info("Based on clue score, top answers:"); // for (MemoryQuestion mq: questions) // log.info (mq.correctAnswer + " times= clue=" + mq.clue.clue); int count = 0; for (MemoryQuestion mq : questions) { mq.setQuestionNum(count++); } // log the questions as well, just in case we don't get to the final point due to user fatigue // or crashes logStats("questions.final", false); }