public StringBuffer translate() { for (index = 0; index < line.length(); index++) { char c = line.charAt(index); if (Character.isDigit(c)) { dealWithOperand(); } else if (isOperator(c)) { dealWithOperator(c); } else if (c == '(') { stack.push(new Character(c)); } else if (c == ')') { dealWithCloser(); } else if (Character.isSpaceChar(c)) { // do nothing } else { System.out.println("Error: unknown character" + c); } } // pop and output all the operators left on the stack while (!stack.empty()) { out.append(popChar()); } return out; }
public static void main(String[] args) throws Exception { int size = Util.getPropertyInt("size", 100); double min = Util.getPropertyDouble("min", 0.01); double max = Util.getPropertyDouble("max", 0.9); Font font = new Font("serif", Font.PLAIN, size); String fpath = Util.getProperty("font", null); if (fpath != null) { font = Font.createFont(Font.TRUETYPE_FONT, new FileInputStream(fpath)); } for (char c = Character.MIN_VALUE + 1; c < Character.MAX_VALUE; ++c) { int type = Character.getType(c); if (type != Character.CONTROL && type != Character.FORMAT && type != Character.PRIVATE_USE && type != Character.SURROGATE && type != Character.UNASSIGNED && !Character.isMirrored(c) && !Character.isSpaceChar(c)) { String s = "" + c; if (Normalizer.normalize(s, NFKC).contains("\u0308")) continue; // TODO: adhoc UnigramMetrics m = new UnigramMetrics(s, size, false, true, font); if (min < m.getBlackness() && m.getBlackness() < max) { System.out.println("" + c + " " + (int) c); } } } }
/** * This is the write() method of the stream. All Writer subclasses implement this. All other * versions of write() are variants of this one */ public void write(char[] buffer, int index, int len) { synchronized (this.lock) { // Loop through all the characters passed to us for (int i = index; i < index + len; i++) { // If we haven't begun a page (or a new page), do that now. if (page == null) newpage(); // If the character is a line terminator, then begin new line, // unless it is a \n immediately after a \r. if (buffer[i] == '\n') { if (!last_char_was_return) newline(); continue; } if (buffer[i] == '\r') { newline(); last_char_was_return = true; continue; } else last_char_was_return = false; // If it some other non-printing character, ignore it. if (Character.isWhitespace(buffer[i]) && !Character.isSpaceChar(buffer[i]) && (buffer[i] != '\t')) continue; // If no more characters will fit on the line, start a new line. if (charnum >= chars_per_line) { newline(); if (page == null) newpage(); // and start a new page, if necessary } // Now print the character: // If it is a space, skip one space, without output. // If it is a tab, skip the necessary number of spaces. // Otherwise, print the character. // It is inefficient to draw only one character at a time, but // because our FontMetrics don't match up exactly to what the // printer uses we need to position each character individually. if (Character.isSpaceChar(buffer[i])) charnum++; else if (buffer[i] == '\t') charnum += 8 - (charnum % 8); else { page.drawChars( buffer, i, 1, x0 + charnum * charwidth, y0 + (linenum * lineheight) + lineascent); charnum++; } } } }
void skipSpaces() { while (index < s.length()) { char c = s.charAt(index); if (!Character.isSpaceChar(c) && c != '\n' && c != '\r' && c != '\t') { return; } else { index++; } } }
/** * Trims trailing spaces from input. * * @param input The input string. * @return A new string with trailing spaces trimmed. If there are no trailing spaces, returns * <CODE>input</CODE>. */ public static String rtrim(String input) { String retVal = input; if (input != null) { int lastCharIndex = input.length() - 1; int originalLastCharIndex = lastCharIndex; while ((lastCharIndex >= 0) && Character.isSpaceChar(input.charAt(lastCharIndex))) { lastCharIndex--; } if (lastCharIndex != originalLastCharIndex) { // We have characters to trim. retVal = input.substring(0, lastCharIndex + 1); } } return retVal; }
/** Generates person names tests from the given archive. @throws IOException */ public void generatePersonNameQuestions( Archive archive, NERModel nerModel, Collection<EmailDocument> allDocs, Lexicon lex, int numClues) throws IOException, GeneralSecurityException, ClassNotFoundException, ReadContentsException, ParseException { this.archive = archive; questions = new ArrayList<>(); ArchiveCluer cluer = new ArchiveCluer(null, archive, nerModel, null, lex); tabooCluesSet = new LinkedHashSet<>(); archive.assignThreadIds(); List<ClueEvaluator> evaluators = getDefaultEvals(); List<Document> docs = archive.getAllDocs(); Multimap<Contact, EmailDocument> contactToMessages = LinkedHashMultimap.create(); Multimap<Contact, Long> contactToThreadIds = LinkedHashMultimap.create(); // sort by date Collections.sort(docs); Date earliestDate = null, latestDate = null; Map<Contact, Date> contactToLatestDate = new LinkedHashMap<>(); // compute earliest and latest date across all messages in corpus for (Document doc : docs) { EmailDocument ed = (EmailDocument) doc; if (earliestDate == null || ed.date.before(earliestDate)) earliestDate = ed.date; if (latestDate == null || ed.date.after(latestDate)) latestDate = ed.date; } JSPHelper.log.info( "===================\nStarting to generate person names memory questions from " + docs.size() + " messages with " + numClues + " questions" + ", earliest date = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(earliestDate) + " latest date = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(latestDate)); Set<Integer> tabooSentenceHashes = new LinkedHashSet<>(); // create hashes of all sentences seen at least twice (case insensitive, lower cased) { Set<Integer> hashesSeen = new LinkedHashSet<>(); for (Document d : docs) { String contents = archive.getContents(d, true); String cleanedContents = EmailUtils.cleanupEmailMessage(contents); SentenceTokenizer st = new SentenceTokenizer(cleanedContents); while (st.hasMoreSentences()) { String sentence = st.nextSentence(); sentence = canonicalizeSentence(sentence); int hashCode = sentence.hashCode(); if (hashesSeen.contains(hashCode)) { tabooSentenceHashes.add(hashCode); log.info("Marking sentence as taboo: " + sentence); } else hashesSeen.add(hashCode); } } } // compute contactToLatestDate that contact has been seen on for (Document doc : docs) { EmailDocument ed = (EmailDocument) doc; // discard doc if it is not a sent mail if ((ed.sentOrReceived(archive.addressBook) & EmailDocument.SENT_MASK) == 0) continue; for (Contact c : ed.getParticipatingContactsExceptOwn(archive.addressBook)) { Date currentLatestDate = contactToLatestDate.get(c); if (currentLatestDate == null || currentLatestDate.before(ed.date)) contactToLatestDate.put(c, ed.date); contactToMessages.put(c, ed); contactToThreadIds.put(c, ed.threadID); } } log.info("We are considering " + contactToLatestDate.size() + " contacts"); Date currentDate = new Date(); List<Pair<Date, Date>> intervals = computeDateIntervals(earliestDate, currentDate); // go back from current date // intervals[0] is the most recent. JSPHelper.log.info("done computing " + intervals.size() + " intervals"); for (Pair<Date, Date> p : intervals) JSPHelper.log.info( "Interval: " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getFirst()) + " - " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getSecond())); int cluesPerInterval = (numClues > 0 && intervals.size() > 0) ? (numClues + intervals.size() - 1) / intervals.size() : 0; JSPHelper.log.info( "Will try to generate " + Util.pluralize(cluesPerInterval, "questions") + " per interval"); Multimap<Integer, Contact> intervalToContacts = LinkedHashMultimap.create(); // nSent is the number of sentences allowed in a clue text int nSent = 2; for (Contact c : contactToLatestDate.keySet()) { Date lastSeenDate = contactToLatestDate.get(c); // which interval does this date belong to? we'll assign this contact in that interval in the // intervalToContacts map int interval = -1; Date intervalStart = null, intervalEnd = null; { int i = 0; for (Pair<Date, Date> p : intervals) { intervalStart = p.getFirst(); intervalEnd = p.getSecond(); if ((intervalStart.before(lastSeenDate) && intervalEnd.after(lastSeenDate)) || intervalStart.equals(lastSeenDate) || intervalEnd.equals(lastSeenDate)) { interval = i; break; } i++; } } if (interval < 0 || interval == intervals.size()) { JSPHelper.log.info( "What, no interval!? for " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(lastSeenDate)); continue; } intervalToContacts.put(interval, c); } log.info("Interval information (interval 0 is the most recent):"); for (int interval = 0; interval < intervals.size(); interval++) { Collection<Contact> contacts = intervalToContacts.get(interval); int nContactsForThisInterval = (contacts == null) ? 0 : contacts.size(); log.info( "In interval " + interval + " there are " + Util.pluralize(nContactsForThisInterval, "candidate contact") + " who were last seen in this interval"); } for (int interval = 0; interval < intervals.size(); interval++) { Date intervalStart = intervals.get(interval).getFirst(); Date intervalEnd = intervals.get(interval).getSecond(); Collection<Contact> candidateContactsForThisInterval = intervalToContacts.get(interval); if (candidateContactsForThisInterval == null) { log.info("Skipping interval " + interval + " because there are no contacts"); continue; } Map<Clue, Contact> clueToContact = new LinkedHashMap<>(); log.info("=======\nGenerating questions for interval " + interval); outer: for (Contact c : candidateContactsForThisInterval) { String name = c.pickBestName(); if (name.length() < 2) // could also check if alphanumberic only continue outer; // ignore contact if name does not contain all alphabets. Even a period is not allowed. only // space is allowed. for (char ch : name.toCharArray()) { if (!Character.isAlphabetic(ch) && !Character.isSpaceChar(ch)) continue outer; } Clue clue = cluer.createPersonNameClue( c, evaluators, nerModel, intervalStart, intervalEnd, nSent, archive, tabooSentenceHashes); if (clue != null) clueToContact.put(clue, c); } List<Clue> clueList = new ArrayList(clueToContact.keySet()); Collections.sort(clueList); List<Clue> selectedClues = new ArrayList<>(); for (int i = 0; i < cluesPerInterval && i < clueList.size(); i++) { selectedClues.add(clueList.get(i)); } log.info( "For interval " + interval + " selected " + selectedClues.size() + " contacts out of " + clueList.size() + " possible candidates."); // for (Clue c: clueList) // log.info ("Clue candidate for " + clueToContact.get(c).pickBestName() + " // score = " + c.clueStats.finalScore+ " clue is " + c ); // for (Clue c: selectedClues) // log.info ("Selected clue: " + clueToContact.get(c).pickBestName() + " score = " // + c.clueStats.finalScore+ " clue is " + c); for (Clue selectedClue : selectedClues) { Contact c = clueToContact.get(selectedClue); String name = c.pickBestName(); List<Integer> lengthList = Crossword.convertToWord(name).getSecond(); String lengthDescr = ""; if (lengthList.size() > 1) lengthDescr += Integer.toString(lengthList.size()) + " words: "; for (Integer i : lengthList) { lengthDescr += Util.pluralize(i, "letter") + ", "; } lengthDescr = lengthDescr.substring(0, lengthDescr.length() - 2); // subtract the extra comma. ClueInfo ci = new ClueInfo(); ci.lastSeenDate = contactToLatestDate.get(c); ci.nMessages = contactToThreadIds.get(c).size(); ci.nThreads = contactToThreadIds.get(c).size(); questions.add(new MemoryQuestion(this, name, selectedClue, 1, lengthDescr)); } } log.info(questions.size() + " questions generated"); log.info("Top candidates are:"); // sort q's by clue score Collections.sort(questions); // log.info("Based on clue score, top answers:"); // for (MemoryQuestion mq: questions) // log.info (mq.correctAnswer + " times= clue=" + mq.clue.clue); int count = 0; for (MemoryQuestion mq : questions) { mq.setQuestionNum(count++); } // log the questions as well, just in case we don't get to the final point due to user fatigue // or crashes logStats("questions.final", false); }