Пример #1
0
  public StringBuffer translate() {
    for (index = 0; index < line.length(); index++) {
      char c = line.charAt(index);

      if (Character.isDigit(c)) {
        dealWithOperand();
      } else if (isOperator(c)) {
        dealWithOperator(c);
      } else if (c == '(') {
        stack.push(new Character(c));
      } else if (c == ')') {
        dealWithCloser();
      } else if (Character.isSpaceChar(c)) {
        // do nothing
      } else {
        System.out.println("Error: unknown character" + c);
      }
    }

    // pop and output all the operators left on the stack
    while (!stack.empty()) {
      out.append(popChar());
    }
    return out;
  }
Пример #2
0
  public static void main(String[] args) throws Exception {
    int size = Util.getPropertyInt("size", 100);
    double min = Util.getPropertyDouble("min", 0.01);
    double max = Util.getPropertyDouble("max", 0.9);
    Font font = new Font("serif", Font.PLAIN, size);
    String fpath = Util.getProperty("font", null);
    if (fpath != null) {
      font = Font.createFont(Font.TRUETYPE_FONT, new FileInputStream(fpath));
    }

    for (char c = Character.MIN_VALUE + 1; c < Character.MAX_VALUE; ++c) {
      int type = Character.getType(c);
      if (type != Character.CONTROL
          && type != Character.FORMAT
          && type != Character.PRIVATE_USE
          && type != Character.SURROGATE
          && type != Character.UNASSIGNED
          && !Character.isMirrored(c)
          && !Character.isSpaceChar(c)) {
        String s = "" + c;
        if (Normalizer.normalize(s, NFKC).contains("\u0308")) continue; // TODO: adhoc
        UnigramMetrics m = new UnigramMetrics(s, size, false, true, font);
        if (min < m.getBlackness() && m.getBlackness() < max) {
          System.out.println("" + c + " " + (int) c);
        }
      }
    }
  }
Пример #3
0
  /**
   * This is the write() method of the stream. All Writer subclasses implement this. All other
   * versions of write() are variants of this one
   */
  public void write(char[] buffer, int index, int len) {
    synchronized (this.lock) {
      // Loop through all the characters passed to us
      for (int i = index; i < index + len; i++) {
        // If we haven't begun a page (or a new page), do that now.
        if (page == null) newpage();

        // If the character is a line terminator, then begin new line,
        // unless it is a \n immediately after a \r.
        if (buffer[i] == '\n') {
          if (!last_char_was_return) newline();
          continue;
        }
        if (buffer[i] == '\r') {
          newline();
          last_char_was_return = true;
          continue;
        } else last_char_was_return = false;

        // If it some other non-printing character, ignore it.
        if (Character.isWhitespace(buffer[i])
            && !Character.isSpaceChar(buffer[i])
            && (buffer[i] != '\t')) continue;

        // If no more characters will fit on the line, start a new line.
        if (charnum >= chars_per_line) {
          newline();
          if (page == null) newpage(); // and start a new page, if necessary
        }

        // Now print the character:
        // If it is a space, skip one space, without output.
        // If it is a tab, skip the necessary number of spaces.
        // Otherwise, print the character.
        // It is inefficient to draw only one character at a time, but
        // because our FontMetrics don't match up exactly to what the
        // printer uses we need to position each character individually.
        if (Character.isSpaceChar(buffer[i])) charnum++;
        else if (buffer[i] == '\t') charnum += 8 - (charnum % 8);
        else {
          page.drawChars(
              buffer, i, 1, x0 + charnum * charwidth, y0 + (linenum * lineheight) + lineascent);
          charnum++;
        }
      }
    }
  }
 void skipSpaces() {
   while (index < s.length()) {
     char c = s.charAt(index);
     if (!Character.isSpaceChar(c) && c != '\n' && c != '\r' && c != '\t') {
       return;
     } else {
       index++;
     }
   }
 }
Пример #5
0
  /**
   * Trims trailing spaces from input.
   *
   * @param input The input string.
   * @return A new string with trailing spaces trimmed. If there are no trailing spaces, returns
   *     <CODE>input</CODE>.
   */
  public static String rtrim(String input) {
    String retVal = input;

    if (input != null) {
      int lastCharIndex = input.length() - 1;
      int originalLastCharIndex = lastCharIndex;

      while ((lastCharIndex >= 0) && Character.isSpaceChar(input.charAt(lastCharIndex))) {
        lastCharIndex--;
      }
      if (lastCharIndex != originalLastCharIndex) {
        // We have characters to trim.
        retVal = input.substring(0, lastCharIndex + 1);
      }
    }

    return retVal;
  }
Пример #6
0
  /** Generates person names tests from the given archive. @throws IOException */
  public void generatePersonNameQuestions(
      Archive archive,
      NERModel nerModel,
      Collection<EmailDocument> allDocs,
      Lexicon lex,
      int numClues)
      throws IOException, GeneralSecurityException, ClassNotFoundException, ReadContentsException,
          ParseException {
    this.archive = archive;
    questions = new ArrayList<>();
    ArchiveCluer cluer = new ArchiveCluer(null, archive, nerModel, null, lex);

    tabooCluesSet = new LinkedHashSet<>();
    archive.assignThreadIds();

    List<ClueEvaluator> evaluators = getDefaultEvals();

    List<Document> docs = archive.getAllDocs();
    Multimap<Contact, EmailDocument> contactToMessages = LinkedHashMultimap.create();
    Multimap<Contact, Long> contactToThreadIds = LinkedHashMultimap.create();

    // sort by date
    Collections.sort(docs);

    Date earliestDate = null, latestDate = null;
    Map<Contact, Date> contactToLatestDate = new LinkedHashMap<>();

    // compute earliest and latest date across all messages in corpus
    for (Document doc : docs) {
      EmailDocument ed = (EmailDocument) doc;

      if (earliestDate == null || ed.date.before(earliestDate)) earliestDate = ed.date;
      if (latestDate == null || ed.date.after(latestDate)) latestDate = ed.date;
    }
    JSPHelper.log.info(
        "===================\nStarting to generate person names memory questions from "
            + docs.size()
            + " messages with "
            + numClues
            + " questions"
            + ", earliest date = "
            + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(earliestDate)
            + " latest date = "
            + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(latestDate));

    Set<Integer> tabooSentenceHashes = new LinkedHashSet<>();

    // create hashes of all sentences seen at least twice (case insensitive, lower cased)
    {
      Set<Integer> hashesSeen = new LinkedHashSet<>();
      for (Document d : docs) {
        String contents = archive.getContents(d, true);
        String cleanedContents = EmailUtils.cleanupEmailMessage(contents);
        SentenceTokenizer st = new SentenceTokenizer(cleanedContents);
        while (st.hasMoreSentences()) {
          String sentence = st.nextSentence();
          sentence = canonicalizeSentence(sentence);
          int hashCode = sentence.hashCode();
          if (hashesSeen.contains(hashCode)) {
            tabooSentenceHashes.add(hashCode);
            log.info("Marking sentence as taboo: " + sentence);
          } else hashesSeen.add(hashCode);
        }
      }
    }

    // compute contactToLatestDate that contact has been seen on
    for (Document doc : docs) {
      EmailDocument ed = (EmailDocument) doc;
      // discard doc if it is not a sent mail
      if ((ed.sentOrReceived(archive.addressBook) & EmailDocument.SENT_MASK) == 0) continue;

      for (Contact c : ed.getParticipatingContactsExceptOwn(archive.addressBook)) {
        Date currentLatestDate = contactToLatestDate.get(c);
        if (currentLatestDate == null || currentLatestDate.before(ed.date))
          contactToLatestDate.put(c, ed.date);
        contactToMessages.put(c, ed);
        contactToThreadIds.put(c, ed.threadID);
      }
    }

    log.info("We are considering " + contactToLatestDate.size() + " contacts");

    Date currentDate = new Date();
    List<Pair<Date, Date>> intervals =
        computeDateIntervals(earliestDate, currentDate); // go back from current date
    // intervals[0] is the most recent.
    JSPHelper.log.info("done computing " + intervals.size() + " intervals");
    for (Pair<Date, Date> p : intervals)
      JSPHelper.log.info(
          "Interval: "
              + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getFirst())
              + " - "
              + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getSecond()));

    int cluesPerInterval =
        (numClues > 0 && intervals.size() > 0)
            ? (numClues + intervals.size() - 1) / intervals.size()
            : 0;
    JSPHelper.log.info(
        "Will try to generate " + Util.pluralize(cluesPerInterval, "questions") + " per interval");

    Multimap<Integer, Contact> intervalToContacts = LinkedHashMultimap.create();

    // nSent is the number of sentences allowed in a clue text
    int nSent = 2;
    for (Contact c : contactToLatestDate.keySet()) {
      Date lastSeenDate = contactToLatestDate.get(c);

      // which interval does this date belong to? we'll assign this contact in that interval in the
      // intervalToContacts map
      int interval = -1;
      Date intervalStart = null, intervalEnd = null;
      {
        int i = 0;
        for (Pair<Date, Date> p : intervals) {
          intervalStart = p.getFirst();
          intervalEnd = p.getSecond();

          if ((intervalStart.before(lastSeenDate) && intervalEnd.after(lastSeenDate))
              || intervalStart.equals(lastSeenDate)
              || intervalEnd.equals(lastSeenDate)) {
            interval = i;
            break;
          }
          i++;
        }
      }

      if (interval < 0 || interval == intervals.size()) {
        JSPHelper.log.info(
            "What, no interval!? for "
                + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(lastSeenDate));
        continue;
      }

      intervalToContacts.put(interval, c);
    }

    log.info("Interval information (interval 0 is the most recent):");
    for (int interval = 0; interval < intervals.size(); interval++) {
      Collection<Contact> contacts = intervalToContacts.get(interval);
      int nContactsForThisInterval = (contacts == null) ? 0 : contacts.size();
      log.info(
          "In interval "
              + interval
              + " there are "
              + Util.pluralize(nContactsForThisInterval, "candidate contact")
              + " who were last seen in this interval");
    }

    for (int interval = 0; interval < intervals.size(); interval++) {
      Date intervalStart = intervals.get(interval).getFirst();
      Date intervalEnd = intervals.get(interval).getSecond();
      Collection<Contact> candidateContactsForThisInterval = intervalToContacts.get(interval);
      if (candidateContactsForThisInterval == null) {
        log.info("Skipping interval " + interval + " because there are no contacts");
        continue;
      }

      Map<Clue, Contact> clueToContact = new LinkedHashMap<>();
      log.info("=======\nGenerating questions for interval " + interval);

      outer:
      for (Contact c : candidateContactsForThisInterval) {
        String name = c.pickBestName();
        if (name.length() < 2) // could also check if alphanumberic only
        continue outer;

        // ignore contact if name does not contain all alphabets. Even a period is not allowed. only
        // space is allowed.
        for (char ch : name.toCharArray()) {
          if (!Character.isAlphabetic(ch) && !Character.isSpaceChar(ch)) continue outer;
        }

        Clue clue =
            cluer.createPersonNameClue(
                c,
                evaluators,
                nerModel,
                intervalStart,
                intervalEnd,
                nSent,
                archive,
                tabooSentenceHashes);
        if (clue != null) clueToContact.put(clue, c);
      }

      List<Clue> clueList = new ArrayList(clueToContact.keySet());
      Collections.sort(clueList);
      List<Clue> selectedClues = new ArrayList<>();
      for (int i = 0; i < cluesPerInterval && i < clueList.size(); i++) {
        selectedClues.add(clueList.get(i));
      }

      log.info(
          "For interval "
              + interval
              + " selected "
              + selectedClues.size()
              + " contacts out of "
              + clueList.size()
              + " possible candidates.");
      //            for (Clue c: clueList)
      //               log.info ("Clue candidate for " + clueToContact.get(c).pickBestName() + "
      // score = " + c.clueStats.finalScore+ " clue is " + c );
      //          for (Clue c: selectedClues)
      //             log.info ("Selected clue: " + clueToContact.get(c).pickBestName() + " score = "
      // + c.clueStats.finalScore+ " clue is " + c);

      for (Clue selectedClue : selectedClues) {
        Contact c = clueToContact.get(selectedClue);
        String name = c.pickBestName();

        List<Integer> lengthList = Crossword.convertToWord(name).getSecond();
        String lengthDescr = "";
        if (lengthList.size() > 1) lengthDescr += Integer.toString(lengthList.size()) + " words: ";

        for (Integer i : lengthList) {
          lengthDescr += Util.pluralize(i, "letter") + ", ";
        }
        lengthDescr =
            lengthDescr.substring(0, lengthDescr.length() - 2); // subtract the extra comma.

        ClueInfo ci = new ClueInfo();
        ci.lastSeenDate = contactToLatestDate.get(c);
        ci.nMessages = contactToThreadIds.get(c).size();
        ci.nThreads = contactToThreadIds.get(c).size();

        questions.add(new MemoryQuestion(this, name, selectedClue, 1, lengthDescr));
      }
    }

    log.info(questions.size() + " questions generated");

    log.info("Top candidates are:");

    // sort q's by clue score
    Collections.sort(questions);

    //		log.info("Based on clue score, top answers:");
    //		for (MemoryQuestion mq: questions)
    //			log.info (mq.correctAnswer + " times= clue=" + mq.clue.clue);

    int count = 0;
    for (MemoryQuestion mq : questions) {
      mq.setQuestionNum(count++);
    }

    // log the questions as well, just in case we don't get to the final point due to user fatigue
    // or crashes
    logStats("questions.final", false);
  }