public void swap(int i1, int i2, int j1, int j2) { int ilength = i2 - i1 + 1; int jlength = j2 - j1 + 1; int diff = jlength - ilength; Set<Pair<Integer, Integer>> newAlignments = new HashSet<Pair<Integer, Integer>>(); for (Pair<Integer, Integer> alignment : sureAlignments) { int pos = alignment.getFirst(); if (pos < i1 || pos > j2) { newAlignments.add(alignment); } else if (pos >= i1 && pos <= i2) { newAlignments.add(new Pair<Integer, Integer>(pos + j2 - i2, alignment.getSecond())); } else if (pos > i2 && pos < j1) { newAlignments.add(new Pair<Integer, Integer>(pos + diff, alignment.getSecond())); } else if (pos >= j1 && pos <= j2) { newAlignments.add(new Pair<Integer, Integer>(pos - j1 + i1, alignment.getSecond())); } else { System.err.println("Error in Alignment.swap()"); System.exit(1); } } sureAlignments = newAlignments; }
Pair Calculate(int u) { BigInteger ans; Pair temp; temp = new Pair(); if (g[u].size() == 0) { temp.x = BigInteger.ONE; temp.n = 1; return temp; } if (g[u].size() == 1) { temp = Calculate((int) g[u].get(0)); temp.n++; return temp; } int p, q; temp = Calculate((int) g[u].get(0)); ans = temp.x; p = temp.n; temp = Calculate((int) g[u].get(1)); ans = ans.multiply(temp.x); q = temp.n; ans = ans.multiply(C[p + q][p]); temp.x = ans; temp.n = p + q + 1; return temp; }
public Set<Integer> getAlignedSources(int targetPosition) { Set<Integer> sources = new HashSet<Integer>(); for (Pair<Integer, Integer> alignment : sureAlignments) { if (alignment.getFirst() == targetPosition) { sources.add(alignment.getSecond()); } } return sources; }
public int getAlignedTarget(int sourcePosition) { for (Pair<Integer, Integer> alignment : sureAlignments) { if (alignment.getSecond() == sourcePosition) { return alignment.getFirst(); } } System.err.println("nothing aligned with " + sourcePosition); return -999; }
public Pair decode() { Pair ret; if (cursor < output.size()) { ret = (Pair) output.get(cursor++); ret.setValid(true); } else { ret = new Pair(0, 'a', false); } return ret; }
private void shiftAlignments(int targetPosition, boolean up, int changeto) { Set<Pair<Integer, Integer>> newAlignments = new HashSet<Pair<Integer, Integer>>(); for (Pair<Integer, Integer> alignment : sureAlignments) { int pos = alignment.getFirst(); // System.out.println("shift: "+pos); if (pos < targetPosition) { newAlignments.add(alignment); } else if (pos == targetPosition) { newAlignments.add( new Pair<Integer, Integer>((up ? pos + 1 : changeto), alignment.getSecond())); } else if (pos > targetPosition) { newAlignments.add( new Pair<Integer, Integer>((up ? pos + 1 : pos - 1), alignment.getSecond())); } } sureAlignments = newAlignments; }
public int compare(Pair string1, Pair string2) { int ans = string1.toString().compareTo(string2.toString()); if (ans < 0) { return -1; } else if (ans == 0) { if (string1.idFile < string2.idFile) { return -1; } else if (string1.idFile > string2.idFile) { return 1; } else { if (string1.idWord < string2.idWord) { return -1; } else if (string1.idWord > string2.idWord) { return 1; } else { return 0; } } } else { return 1; } }
/** @param args generator string | number of keys | output directory */ public static void main(String[] args) { /* Ensure there is correct number of arguments */ if (args.length != 3) { System.err.println("Usage:"); System.err.println( "\tjava crypto.ElGamalKeyGenerator [generator string] [number of keys] [output directory]"); System.exit(0); } /* Set a default limit value */ int numKeys = -1; /* Parse the first argument as the number of keys, expecting an integer */ try { numKeys = Integer.parseInt(args[1]); } catch (Exception e) { System.out.println("Expected integer for [number of keys], found \"" + args[1] + "\"."); e.printStackTrace(); System.exit(0); } File dir = null; /* Create directory with output path in args[2] if it already does not exist */ try { dir = new File(args[2]); if (!dir.exists()) dir.mkdirs(); } catch (Exception e) { System.out.println("Expected path for [output directory], found \"" + args[2] + "\"."); System.out.println("\tError: " + e.getMessage()); System.exit(0); } /* Iterate over the number of keys to be generated */ for (int i = 0; i < numKeys; i++) { /* Generate ElGamal Private and Public key Pair. */ Pair<Key> keys = ElGamalCrypto.SINGLETON.generate(args[0]); Key publicKey = keys.get1(); Key privateKey = keys.get2(); /* Here we parse the keys into an s-expression */ ASExpression pub = publicKey.toASE(); ASExpression priv = privateKey.toASE(); /* The generated pair of public and private keys are stored in the <index>public.key and <index>private.key respectively. */ File pubFile = new File(dir, i + "public.key"); File privFile = new File(dir, i + "private.key"); try { /* Create a new output stream for the public key file */ OutputStream out = new FileOutputStream(pubFile); /* Convert the s-expression into Rivest Verbatim format and then write it to the <index>public.key */ out.write(pub.toVerbatim()); out.flush(); out.close(); /* Create a new output stream for the private key file*/ out = new FileOutputStream(privFile); /* Convert the s-expression into Rivest Verbatim format and then write it to the <index>private.key */ out.write(priv.toVerbatim()); out.flush(); out.close(); } catch (IOException e) { System.err.println("Encountered error writing key files."); e.printStackTrace(); System.exit(0); } } }
public void solve() throws Exception { P[] ps = new P[N]; Map<Integer, Set<Integer>> map = new HashMap<Integer, Set<Integer>>(); for (int i = 0; i < N; i++) { P p = new P(); p.x = sc.nextInt(); p.y = sc.nextInt(); ps[i] = p; if (!map.containsKey(p.x)) { Set<Integer> set = new HashSet<Integer>(); map.put(p.x, set); } map.get(p.x).add(p.y); } Arrays.sort( ps, new Comparator<P>() { @Override public int compare(P p1, P p2) { if (p1.x != p2.x) { return p1.x - p2.x; } return p1.y - p2.y; } }); List<Pair> yp = new ArrayList<Pair>(); for (int i = 0; i < ps.length - 1; i++) { if (ps[i].x != ps[i + 1].x) continue; int lidx = i + 1; while (lidx + 1 < ps.length && ps[i].x == ps[lidx + 1].x) { lidx++; } Pair pair = new Pair(); pair.s = ps[i].y; pair.l = ps[lidx].y; pair.base = ps[i].x; yp.add(pair); i = lidx; } Arrays.sort( ps, new Comparator<P>() { @Override public int compare(P p1, P p2) { if (p1.y != p2.y) { return p1.y - p2.y; } return p1.x - p2.x; } }); List<Pair> xp = new ArrayList<Pair>(); for (int i = 0; i < ps.length - 1; i++) { if (ps[i].y != ps[i + 1].y) continue; int lidx = i + 1; while (lidx + 1 < ps.length && ps[i].y == ps[lidx + 1].y) { lidx++; } Pair pair = new Pair(); pair.s = ps[i].x; pair.l = ps[lidx].x; pair.base = ps[i].y; xp.add(pair); i = lidx; } int ans = 0; for (int i = 0; i < yp.size(); i++) { int xnow = yp.get(i).base; int sy = yp.get(i).s; int ly = yp.get(i).l; for (int j = 0; j < xp.size(); j++) { int y = xp.get(j).base; if (y < sy || ly < y) continue; if (xp.get(j).s > xnow || xp.get(j).l < xnow) continue; try { if (!map.get(xnow).contains(y)) { ans++; map.get(xnow).add(y); } } catch (Exception ex) { } } } ans += ps.length; out.println(ans); }
/** Generates person names tests from the given archive. @throws IOException */ public void generatePersonNameQuestions( Archive archive, NERModel nerModel, Collection<EmailDocument> allDocs, Lexicon lex, int numClues) throws IOException, GeneralSecurityException, ClassNotFoundException, ReadContentsException, ParseException { this.archive = archive; questions = new ArrayList<>(); ArchiveCluer cluer = new ArchiveCluer(null, archive, nerModel, null, lex); tabooCluesSet = new LinkedHashSet<>(); archive.assignThreadIds(); List<ClueEvaluator> evaluators = getDefaultEvals(); List<Document> docs = archive.getAllDocs(); Multimap<Contact, EmailDocument> contactToMessages = LinkedHashMultimap.create(); Multimap<Contact, Long> contactToThreadIds = LinkedHashMultimap.create(); // sort by date Collections.sort(docs); Date earliestDate = null, latestDate = null; Map<Contact, Date> contactToLatestDate = new LinkedHashMap<>(); // compute earliest and latest date across all messages in corpus for (Document doc : docs) { EmailDocument ed = (EmailDocument) doc; if (earliestDate == null || ed.date.before(earliestDate)) earliestDate = ed.date; if (latestDate == null || ed.date.after(latestDate)) latestDate = ed.date; } JSPHelper.log.info( "===================\nStarting to generate person names memory questions from " + docs.size() + " messages with " + numClues + " questions" + ", earliest date = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(earliestDate) + " latest date = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(latestDate)); Set<Integer> tabooSentenceHashes = new LinkedHashSet<>(); // create hashes of all sentences seen at least twice (case insensitive, lower cased) { Set<Integer> hashesSeen = new LinkedHashSet<>(); for (Document d : docs) { String contents = archive.getContents(d, true); String cleanedContents = EmailUtils.cleanupEmailMessage(contents); SentenceTokenizer st = new SentenceTokenizer(cleanedContents); while (st.hasMoreSentences()) { String sentence = st.nextSentence(); sentence = canonicalizeSentence(sentence); int hashCode = sentence.hashCode(); if (hashesSeen.contains(hashCode)) { tabooSentenceHashes.add(hashCode); log.info("Marking sentence as taboo: " + sentence); } else hashesSeen.add(hashCode); } } } // compute contactToLatestDate that contact has been seen on for (Document doc : docs) { EmailDocument ed = (EmailDocument) doc; // discard doc if it is not a sent mail if ((ed.sentOrReceived(archive.addressBook) & EmailDocument.SENT_MASK) == 0) continue; for (Contact c : ed.getParticipatingContactsExceptOwn(archive.addressBook)) { Date currentLatestDate = contactToLatestDate.get(c); if (currentLatestDate == null || currentLatestDate.before(ed.date)) contactToLatestDate.put(c, ed.date); contactToMessages.put(c, ed); contactToThreadIds.put(c, ed.threadID); } } log.info("We are considering " + contactToLatestDate.size() + " contacts"); Date currentDate = new Date(); List<Pair<Date, Date>> intervals = computeDateIntervals(earliestDate, currentDate); // go back from current date // intervals[0] is the most recent. JSPHelper.log.info("done computing " + intervals.size() + " intervals"); for (Pair<Date, Date> p : intervals) JSPHelper.log.info( "Interval: " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getFirst()) + " - " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getSecond())); int cluesPerInterval = (numClues > 0 && intervals.size() > 0) ? (numClues + intervals.size() - 1) / intervals.size() : 0; JSPHelper.log.info( "Will try to generate " + Util.pluralize(cluesPerInterval, "questions") + " per interval"); Multimap<Integer, Contact> intervalToContacts = LinkedHashMultimap.create(); // nSent is the number of sentences allowed in a clue text int nSent = 2; for (Contact c : contactToLatestDate.keySet()) { Date lastSeenDate = contactToLatestDate.get(c); // which interval does this date belong to? we'll assign this contact in that interval in the // intervalToContacts map int interval = -1; Date intervalStart = null, intervalEnd = null; { int i = 0; for (Pair<Date, Date> p : intervals) { intervalStart = p.getFirst(); intervalEnd = p.getSecond(); if ((intervalStart.before(lastSeenDate) && intervalEnd.after(lastSeenDate)) || intervalStart.equals(lastSeenDate) || intervalEnd.equals(lastSeenDate)) { interval = i; break; } i++; } } if (interval < 0 || interval == intervals.size()) { JSPHelper.log.info( "What, no interval!? for " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(lastSeenDate)); continue; } intervalToContacts.put(interval, c); } log.info("Interval information (interval 0 is the most recent):"); for (int interval = 0; interval < intervals.size(); interval++) { Collection<Contact> contacts = intervalToContacts.get(interval); int nContactsForThisInterval = (contacts == null) ? 0 : contacts.size(); log.info( "In interval " + interval + " there are " + Util.pluralize(nContactsForThisInterval, "candidate contact") + " who were last seen in this interval"); } for (int interval = 0; interval < intervals.size(); interval++) { Date intervalStart = intervals.get(interval).getFirst(); Date intervalEnd = intervals.get(interval).getSecond(); Collection<Contact> candidateContactsForThisInterval = intervalToContacts.get(interval); if (candidateContactsForThisInterval == null) { log.info("Skipping interval " + interval + " because there are no contacts"); continue; } Map<Clue, Contact> clueToContact = new LinkedHashMap<>(); log.info("=======\nGenerating questions for interval " + interval); outer: for (Contact c : candidateContactsForThisInterval) { String name = c.pickBestName(); if (name.length() < 2) // could also check if alphanumberic only continue outer; // ignore contact if name does not contain all alphabets. Even a period is not allowed. only // space is allowed. for (char ch : name.toCharArray()) { if (!Character.isAlphabetic(ch) && !Character.isSpaceChar(ch)) continue outer; } Clue clue = cluer.createPersonNameClue( c, evaluators, nerModel, intervalStart, intervalEnd, nSent, archive, tabooSentenceHashes); if (clue != null) clueToContact.put(clue, c); } List<Clue> clueList = new ArrayList(clueToContact.keySet()); Collections.sort(clueList); List<Clue> selectedClues = new ArrayList<>(); for (int i = 0; i < cluesPerInterval && i < clueList.size(); i++) { selectedClues.add(clueList.get(i)); } log.info( "For interval " + interval + " selected " + selectedClues.size() + " contacts out of " + clueList.size() + " possible candidates."); // for (Clue c: clueList) // log.info ("Clue candidate for " + clueToContact.get(c).pickBestName() + " // score = " + c.clueStats.finalScore+ " clue is " + c ); // for (Clue c: selectedClues) // log.info ("Selected clue: " + clueToContact.get(c).pickBestName() + " score = " // + c.clueStats.finalScore+ " clue is " + c); for (Clue selectedClue : selectedClues) { Contact c = clueToContact.get(selectedClue); String name = c.pickBestName(); List<Integer> lengthList = Crossword.convertToWord(name).getSecond(); String lengthDescr = ""; if (lengthList.size() > 1) lengthDescr += Integer.toString(lengthList.size()) + " words: "; for (Integer i : lengthList) { lengthDescr += Util.pluralize(i, "letter") + ", "; } lengthDescr = lengthDescr.substring(0, lengthDescr.length() - 2); // subtract the extra comma. ClueInfo ci = new ClueInfo(); ci.lastSeenDate = contactToLatestDate.get(c); ci.nMessages = contactToThreadIds.get(c).size(); ci.nThreads = contactToThreadIds.get(c).size(); questions.add(new MemoryQuestion(this, name, selectedClue, 1, lengthDescr)); } } log.info(questions.size() + " questions generated"); log.info("Top candidates are:"); // sort q's by clue score Collections.sort(questions); // log.info("Based on clue score, top answers:"); // for (MemoryQuestion mq: questions) // log.info (mq.correctAnswer + " times= clue=" + mq.clue.clue); int count = 0; for (MemoryQuestion mq : questions) { mq.setQuestionNum(count++); } // log the questions as well, just in case we don't get to the final point due to user fatigue // or crashes logStats("questions.final", false); }
/** * Generates list of questions and stores it in the current instance of MemoryStudy We handle two * kinds of questions namely, person names tests and non-person name tests. Non-person name test * is a fill in the blank kind where the blank is to be filled with the correct non-person entity * to complete the sentence person name test is to guess the person in correspondent list based on * some distinctive sentences in the mail * * @param maxInt - max. number of questions from a interval * @throws IOException */ public void generateQuestions( Archive archive, NERModel nerModel, Collection<EmailDocument> allDocs, Lexicon lex, int maxInt, boolean personTest) throws IOException, GeneralSecurityException, ClassNotFoundException, ReadContentsException, ParseException { this.archive = archive; if (allDocs == null) allDocs = (Collection) archive.getAllDocs(); questions = new ArrayList<>(); ArchiveCluer cluer = new ArchiveCluer(null, archive, nerModel, null, lex); Short[] itypes = new Short[] { FeatureDictionary.BUILDING, FeatureDictionary.PLACE, FeatureDictionary.RIVER, FeatureDictionary.ROAD, FeatureDictionary.UNIVERSITY, FeatureDictionary.MOUNTAIN, FeatureDictionary.AIRPORT, FeatureDictionary.ISLAND, FeatureDictionary.MUSEUM, FeatureDictionary.BRIDGE, FeatureDictionary.AIRLINE, FeatureDictionary.THEATRE, FeatureDictionary.LIBRARY, FeatureDictionary.LAWFIRM, FeatureDictionary.GOVAGENCY }; double CUTOFF = 0.001; tabooCluesSet = new LinkedHashSet<>(); archive.assignThreadIds(); List<Document> docs = archive.getAllDocs(); Map<String, Date> entityToLastDate = new LinkedHashMap<>(); Multimap<String, EmailDocument> entityToMessages = LinkedHashMultimap.create(); Multimap<String, Long> entityToThreads = LinkedHashMultimap.create(); Multimap<String, String> ceToDisplayEntity = LinkedHashMultimap.create(); int di = 0; // sort by date Collections.sort(docs); Set<String> ownerNames = archive.ownerNames; Date earliestDate = null, latestDate = null; Set<String> allEntities = new LinkedHashSet<>(); for (Document doc : docs) { EmailDocument ed = (EmailDocument) doc; if (earliestDate == null || ed.date.before(earliestDate)) earliestDate = ed.date; if (latestDate == null || ed.date.after(latestDate)) latestDate = ed.date; List<String> entities = new ArrayList<>(); if (!personTest) { entities.addAll( Arrays.asList(archive.getAllNamesInDoc(doc, true)) .stream() .filter(n -> n.typeScore > CUTOFF) .map(n -> n.text) .collect(Collectors.toList())); } else { // do not consider mailing lists if (ed.sentToMailingLists != null && ed.sentToMailingLists.length > 0) continue; // discard doc if it is not a sent mail if ((ed.sentOrReceived(archive.addressBook) & EmailDocument.SENT_MASK) == 0) continue; List<Address> addrs = new ArrayList<>(); if (ed.to != null) for (Address addr : ed.to) addrs.add(addr); List<String> names = new ArrayList<>(); for (Address addr : addrs) { Contact c = archive.addressBook.lookupByAddress(addr); names.add(c.pickBestName()); } for (String name : names) { if (!ownerNames.contains(name) && !DictUtils.hasDictionaryWord(name)) { entities.add(name); } } } allEntities.addAll(entities); // get entities for (String e : entities) { if (Util.nullOrEmpty(e)) continue; e = e.replaceAll("^\\W+|\\W+$", ""); if (e.length() > 10 && e.toUpperCase().equals(e)) continue; // all upper case, more than 10 letters, you're out. String ce = DictUtils.canonicalize(e); // canonicalize if (ce == null) { JSPHelper.log.info("Dropping entity: " + e); continue; } ceToDisplayEntity.put(ce, e); entityToLastDate.put(ce, ed.date); entityToMessages.put(ce, ed); entityToThreads.put(ce, ed.threadID); } if ((++di) % 1000 == 0) log.info(di + " of " + docs.size() + " messages processed...<br/>"); } log.info( "Considered #" + allEntities.size() + " unique entities and #" + ceToDisplayEntity.size() + " good ones in #" + docs.size() + " docs<br>"); log.info("Owner Names: " + ownerNames); JSPHelper.log.info( "Considered #" + allEntities.size() + " unique entities and #" + ceToDisplayEntity.size() + " good ones in #" + docs.size() + "docs"); JSPHelper.log.info( "earliest date = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(earliestDate)); JSPHelper.log.info( "latest date = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(latestDate)); Multimap<String, String> tokenToCE = LinkedHashMultimap.create(); for (String ce : ceToDisplayEntity.keySet()) { List<String> tokens = Util.tokenize(ce); for (String t : tokens) tokenToCE.put(t, ce); } // Compute date intervals int DAYS_PER_INTERVAL = 30; List<Pair<Date, Date>> intervals = new ArrayList<Pair<Date, Date>>(); { JSPHelper.log.info("computing time intervals"); Date closingDate = latestDate; JSPHelper.log.info( "closing = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(closingDate)); while (earliestDate.before(closingDate)) { Calendar cal = new GregorianCalendar(); cal.setTime(closingDate); // this is the time of the last sighting of the term // scroll to the beginning of this month cal.set(Calendar.HOUR_OF_DAY, 23); cal.set(Calendar.MINUTE, 59); cal.set(Calendar.SECOND, 59); Date endDate = cal.getTime(); cal.add( Calendar.DATE, (1 - DAYS_PER_INTERVAL)); // 1- because we want from 0:00 of first date to 23:59 of // last date cal.set(Calendar.HOUR_OF_DAY, 0); cal.set(Calendar.MINUTE, 0); cal.set(Calendar.SECOND, 0); Date startDate = cal.getTime(); intervals.add(new Pair<Date, Date>(startDate, endDate)); // ok we got an interval // closing date for the next interval is 1 day before endDate cal.add(Calendar.DATE, -1); closingDate = cal.getTime(); } JSPHelper.log.info("done computing intervals, #time intervals: " + intervals.size()); for (Pair<Date, Date> p : intervals) JSPHelper.log.info( "Interval: " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getFirst()) + " - " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getSecond())); } // initialize clueInfos to empty lists List<ClueInfo> clueInfos[] = new ArrayList[intervals.size()]; for (int i = 0; i < intervals.size(); i++) { clueInfos[i] = new ArrayList<ClueInfo>(); } Map<Integer, Integer> intervalCount = new LinkedHashMap<>(); // nSent is the number of sentences allowed in a clue text int nvalidclues = 0, nSent = 2; // generate clueInfos for each entity for (String ce : entityToLastDate.keySet()) { Date lastSeenDate = entityToLastDate.get(ce); // compute displayEntity (which has red for core words) and fullAnswer, which is a simple // string String fullAnswer = ""; { List<String> tokens = Util.tokenize(ceToDisplayEntity.get(ce).iterator().next()); for (String t : tokens) { if (EnglishDictionary.stopWords.contains(t.toLowerCase())) continue; fullAnswer += t + " "; } fullAnswer = fullAnswer.trim(); } // dont want the answer to be scored low just because it has extra non-word chars in the begin // or end fullAnswer = fullAnswer.replaceAll("^\\W+|\\W+$", ""); // which interval does this date belong to? int interval = -1; Date intervalStart = null, intervalEnd = null; { int i = 0; for (Pair<Date, Date> p : intervals) { intervalStart = p.getFirst(); intervalEnd = p.getSecond(); if ((intervalStart.before(lastSeenDate) && intervalEnd.after(lastSeenDate)) || intervalStart.equals(lastSeenDate) || intervalEnd.equals(lastSeenDate)) { interval = i; break; } i++; } } if (interval < 0 || interval == intervals.size()) JSPHelper.log.info( "What, no interval!? for " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(lastSeenDate)); if (!intervalCount.containsKey(interval)) intervalCount.put(interval, 0); if (intervalCount.get(interval) > maxInt) continue; intervalCount.put(interval, intervalCount.get(interval) + 1); List<Integer> lengthList = Crossword.convertToWord(fullAnswer).getSecond(); String lengthDescr = ""; if (lengthList.size() > 1) lengthDescr += Integer.toString(lengthList.size()) + " words: "; for (Integer i : lengthList) { lengthDescr += Util.pluralize(i, "letter") + ", "; } lengthDescr = lengthDescr.substring(0, lengthDescr.length() - 2); // subtract the extra comma. ClueInfo ci = new ClueInfo(); ci.link = "../browse?term=\"" + fullAnswer + "\"&sort_by=recent&searchType=original"; ci.lastSeenDate = lastSeenDate; ci.nMessages = entityToMessages.get(ce).size(); ci.nThreads = entityToThreads.get(ce).size(); // TODO: we are doing default initialisation of evaluators by setting it to null below, it is // more appropriate to consider it as an argument for this method Clue clue = cluer.createClue( fullAnswer, (personTest ? ArchiveCluer.QuestionType.GUESS_CORRESPONDENT : ArchiveCluer.QuestionType.FILL_IN_THE_BLANK), null, tabooCluesSet, null, intervalStart, intervalEnd, nSent, archive); if (clue != null) ci.clues = new Clue[] {clue}; if (ci.clues == null || ci.clues.length == 0 || clue == null) { JSPHelper.log.warn("Did not find any clue for: " + fullAnswer); } else { // is the times value of the clue important? questions.add(new MemoryQuestion(this, fullAnswer, clue, 1, lengthDescr)); nvalidclues++; // makes sure that the clue with the same statement is not generated again tabooCluesSet.add(clue.clue); } clueInfos[interval].add(ci); } log.info("Found valid clues for " + nvalidclues + " answers"); JSPHelper.log.info("Found valid clues for " + nvalidclues + " answers"); log.info("Top candidates are:"); for (MemoryQuestion mq : questions) log.info(mq.correctAnswer + " times=" + mq.stats.nMessagesWithAnswer); // sort q's by clue score Collections.sort(questions); // log.info("Based on clue score, top answers:"); // for (MemoryQuestion mq: questions) // log.info (mq.correctAnswer + " times= clue=" + mq.clue.clue); // now we have up to 2*N questions, sorted by cluescore. // drop ones that are prefix/suffix of another, and cap to N int prev_size = questions.size(); int new_size = questions.size(); // log.info ("#questions before prefix-suffix elim: " + prev_size + " after: " + new_size); int count = 0; for (MemoryQuestion mq : questions) { mq.setQuestionNum(count++); } // log the questions as well, just in case we don't get to the final point due to user fatigue // or crashes logStats("questions.final", false); }
/** writes out csv stats as an encrypted file in RESULTS_DIR/<userid>/filename */ public void logStats(String filename, boolean nullClues) { Indexer.IndexStats stats = archive.getIndexStats(); StringBuilder statsLog = new StringBuilder(); Pair<String, String> indexStats = Util.fieldsToCSV(stats, true); Pair<String, String> addressBookStats = Util.fieldsToCSV(archive.addressBook.getStats(), true); Pair<String, String> studyStats = Util.fieldsToCSV(stats, true); Pair<String, String> archiveStats = Util.fieldsToCSV(archive.stats, true); statsLog.append( "STUDYSTATS-1: " + studyStats.getFirst() + indexStats.getFirst() + addressBookStats.getFirst() + archiveStats.getFirst() + "\n"); statsLog.append( "STUDYSTATS-2: " + studyStats.getSecond() + indexStats.getSecond() + addressBookStats.getSecond() + archiveStats.getSecond() + "\n"); int idx = 1; for (MemoryQuestion mq : this.getQuestions()) { if (nullClues) mq.clue.clue = null; Pair<String, String> p = Util.fieldsToCSV(mq.clue.clueStats, true); Pair<String, String> p1 = Util.fieldsToCSV(mq.stats, true); if (idx == 1) statsLog.append( "QUESTIONSTATS-header: " + p.getFirst() + ',' + p1.getFirst() + "correct answer, user answer, user answer before hint, clue" + "\n"); // statsLog.append("QUESTIONSTATS-2: " + p.getSecond() + ',' + p1.getSecond() + // mq.correctAnswer + "," + mq.userAnswer + "," + mq.userAnswerBeforeHint + "," + // mq.clue.clue.replaceAll(",", " ") + "\n"); statsLog.append( "QUESTIONSTATS-2: " + p.getSecond() + ',' + p1.getSecond() + mq.correctAnswer + "," + mq.userAnswer + "," + mq.userAnswerBeforeHint + "\n"); idx = idx + 1; } String RESULTS_DIR = System.getProperty("user.home") + File.separator + "results" + File.separator + this.stats.userid; new File(RESULTS_DIR).mkdirs(); String file = RESULTS_DIR + File.separator + filename; try { CryptoUtils.writeEncryptedBytes(statsLog.toString().getBytes("UTF-8"), file); } catch (UnsupportedEncodingException e) { Util.print_exception(e, log); } catch (Exception e) { Util.print_exception("NOC ERROR: encryption failed!", e, log); } log.info(statsLog); }
@Override public int compare(Pair<Integer, Double> o1, Pair<Integer, Double> o2) { if (o1.getSecond() > o2.getSecond()) return -1; else if (o1.getSecond() < o2.getSecond()) return 1; else return 0; }
public static void main(String[] args) { ArrayList<Pair> values = new ArrayList<Pair>(); Pair pair = new Pair(); // set pair values: System.out.println("enter number of pairs: "); Scanner input = new Scanner(System.in); int numPair = input.nextInt(); for (int i = 0; i < numPair; i++) { pair.inputPair(); pair.sortPair(); values.add(new Pair(pair.getMin(), pair.getMax())); } for (Pair value : values) { System.out.println(" [" + value.getMin() + ", " + value.getMax() + "] "); } Collections.sort(values, FIRST_LIST); System.out.println("After sorting: "); for (Pair i : values) System.out.println("[" + i.getMin() + ", " + i.getMax() + "] "); }
/** * The core implementation of the search. * * @param root The root word to search from. Traditionally, this is the root of the sentence. * @param candidateFragments The callback for the resulting sentence fragments. This is a * predicate of a triple of values. The return value of the predicate determines whether we * should continue searching. The triple is a triple of * <ol> * <li>The log probability of the sentence fragment, according to the featurizer and the * weights * <li>The features along the path to this fragment. The last element of this is the * features from the most recent step. * <li>The sentence fragment. Because it is relatively expensive to compute the resulting * tree, this is returned as a lazy {@link Supplier}. * </ol> * * @param classifier The classifier for whether an arc should be on the path to a clause split, a * clause split itself, or neither. * @param featurizer The featurizer to use. Make sure this matches the weights! * @param actionSpace The action space we are allowed to take. Each action defines a means of * splitting a clause on a dependency boundary. */ protected void search( // The root to search from IndexedWord root, // The output specs final Predicate<Triple<Double, List<Counter<String>>, Supplier<SentenceFragment>>> candidateFragments, // The learning specs final Classifier<ClauseSplitter.ClauseClassifierLabel, String> classifier, Map<String, ? extends List<String>> hardCodedSplits, final Function<Triple<State, Action, State>, Counter<String>> featurizer, final Collection<Action> actionSpace, final int maxTicks) { // (the fringe) PriorityQueue<Pair<State, List<Counter<String>>>> fringe = new FixedPrioritiesPriorityQueue<>(); // (avoid duplicate work) Set<IndexedWord> seenWords = new HashSet<>(); State firstState = new State(null, null, -9000, null, x -> {}, true); // First state is implicitly "done" fringe.add(Pair.makePair(firstState, new ArrayList<>(0)), -0.0); int ticks = 0; while (!fringe.isEmpty()) { if (++ticks > maxTicks) { // System.err.println("WARNING! Timed out on search with " + ticks + " ticks"); return; } // Useful variables double logProbSoFar = fringe.getPriority(); assert logProbSoFar <= 0.0; Pair<State, List<Counter<String>>> lastStatePair = fringe.removeFirst(); State lastState = lastStatePair.first; List<Counter<String>> featuresSoFar = lastStatePair.second; IndexedWord rootWord = lastState.edge == null ? root : lastState.edge.getDependent(); // Register thunk if (lastState.isDone) { if (!candidateFragments.test( Triple.makeTriple( logProbSoFar, featuresSoFar, () -> { SemanticGraph copy = new SemanticGraph(tree); lastState .thunk .andThen( x -> { // Add the extra edges back in, if they don't break the tree-ness of the // extraction for (IndexedWord newTreeRoot : x.getRoots()) { if (newTreeRoot != null) { // what a strange thing to have happen... for (SemanticGraphEdge extraEdge : extraEdgesByGovernor.get(newTreeRoot)) { assert Util.isTree(x); //noinspection unchecked addSubtree( x, newTreeRoot, extraEdge.getRelation().toString(), tree, extraEdge.getDependent(), tree.getIncomingEdgesSorted(newTreeRoot)); assert Util.isTree(x); } } } }) .accept(copy); return new SentenceFragment(copy, assumedTruth, false); }))) { break; } } // Find relevant auxilliary terms SemanticGraphEdge subjOrNull = null; SemanticGraphEdge objOrNull = null; for (SemanticGraphEdge auxEdge : tree.outgoingEdgeIterable(rootWord)) { String relString = auxEdge.getRelation().toString(); if (relString.contains("obj")) { objOrNull = auxEdge; } else if (relString.contains("subj")) { subjOrNull = auxEdge; } } // Iterate over children // For each outgoing edge... for (SemanticGraphEdge outgoingEdge : tree.outgoingEdgeIterable(rootWord)) { // Prohibit indirect speech verbs from splitting off clauses // (e.g., 'said', 'think') // This fires if the governor is an indirect speech verb, and the outgoing edge is a ccomp if (outgoingEdge.getRelation().toString().equals("ccomp") && ((outgoingEdge.getGovernor().lemma() != null && INDIRECT_SPEECH_LEMMAS.contains(outgoingEdge.getGovernor().lemma())) || INDIRECT_SPEECH_LEMMAS.contains(outgoingEdge.getGovernor().word()))) { continue; } // Get some variables String outgoingEdgeRelation = outgoingEdge.getRelation().toString(); List<String> forcedArcOrder = hardCodedSplits.get(outgoingEdgeRelation); if (forcedArcOrder == null && outgoingEdgeRelation.contains(":")) { forcedArcOrder = hardCodedSplits.get( outgoingEdgeRelation.substring(0, outgoingEdgeRelation.indexOf(":")) + ":*"); } boolean doneForcedArc = false; // For each action... for (Action action : (forcedArcOrder == null ? actionSpace : orderActions(actionSpace, forcedArcOrder))) { // Check the prerequisite if (!action.prerequisitesMet(tree, outgoingEdge)) { continue; } if (forcedArcOrder != null && doneForcedArc) { break; } // 1. Compute the child state Optional<State> candidate = action.applyTo(tree, lastState, outgoingEdge, subjOrNull, objOrNull); if (candidate.isPresent()) { double logProbability; ClauseClassifierLabel bestLabel; Counter<String> features = featurizer.apply(Triple.makeTriple(lastState, action, candidate.get())); if (forcedArcOrder != null && !doneForcedArc) { logProbability = 0.0; bestLabel = ClauseClassifierLabel.CLAUSE_SPLIT; doneForcedArc = true; } else if (features.containsKey("__undocumented_junit_no_classifier")) { logProbability = Double.NEGATIVE_INFINITY; bestLabel = ClauseClassifierLabel.CLAUSE_INTERM; } else { Counter<ClauseClassifierLabel> scores = classifier.scoresOf(new RVFDatum<>(features)); if (scores.size() > 0) { Counters.logNormalizeInPlace(scores); } String rel = outgoingEdge.getRelation().toString(); if ("nsubj".equals(rel) || "dobj".equals(rel)) { scores.remove( ClauseClassifierLabel.NOT_A_CLAUSE); // Always at least yield on nsubj and dobj } logProbability = Counters.max(scores, Double.NEGATIVE_INFINITY); bestLabel = Counters.argmax(scores, (x, y) -> 0, ClauseClassifierLabel.CLAUSE_SPLIT); } if (bestLabel != ClauseClassifierLabel.NOT_A_CLAUSE) { Pair<State, List<Counter<String>>> childState = Pair.makePair( candidate.get().withIsDone(bestLabel), new ArrayList<Counter<String>>(featuresSoFar) { { add(features); } }); // 2. Register the child state if (!seenWords.contains(childState.first.edge.getDependent())) { // System.err.println(" pushing " + action.signature() + " with " + // argmax.first.edge); fringe.add(childState, logProbability); } } } } } seenWords.add(rootWord); } // System.err.println("Search finished in " + ticks + " ticks and " + classifierEvals + " // classifier evaluations."); }