Beispiel #1
0
  public void swap(int i1, int i2, int j1, int j2) {

    int ilength = i2 - i1 + 1;
    int jlength = j2 - j1 + 1;
    int diff = jlength - ilength;

    Set<Pair<Integer, Integer>> newAlignments = new HashSet<Pair<Integer, Integer>>();

    for (Pair<Integer, Integer> alignment : sureAlignments) {
      int pos = alignment.getFirst();

      if (pos < i1 || pos > j2) {
        newAlignments.add(alignment);
      } else if (pos >= i1 && pos <= i2) {
        newAlignments.add(new Pair<Integer, Integer>(pos + j2 - i2, alignment.getSecond()));
      } else if (pos > i2 && pos < j1) {
        newAlignments.add(new Pair<Integer, Integer>(pos + diff, alignment.getSecond()));
      } else if (pos >= j1 && pos <= j2) {
        newAlignments.add(new Pair<Integer, Integer>(pos - j1 + i1, alignment.getSecond()));
      } else {
        System.err.println("Error in Alignment.swap()");
        System.exit(1);
      }
    }

    sureAlignments = newAlignments;
  }
Beispiel #2
0
 Pair Calculate(int u) {
   BigInteger ans;
   Pair temp;
   temp = new Pair();
   if (g[u].size() == 0) {
     temp.x = BigInteger.ONE;
     temp.n = 1;
     return temp;
   }
   if (g[u].size() == 1) {
     temp = Calculate((int) g[u].get(0));
     temp.n++;
     return temp;
   }
   int p, q;
   temp = Calculate((int) g[u].get(0));
   ans = temp.x;
   p = temp.n;
   temp = Calculate((int) g[u].get(1));
   ans = ans.multiply(temp.x);
   q = temp.n;
   ans = ans.multiply(C[p + q][p]);
   temp.x = ans;
   temp.n = p + q + 1;
   return temp;
 }
Beispiel #3
0
 public Set<Integer> getAlignedSources(int targetPosition) {
   Set<Integer> sources = new HashSet<Integer>();
   for (Pair<Integer, Integer> alignment : sureAlignments) {
     if (alignment.getFirst() == targetPosition) {
       sources.add(alignment.getSecond());
     }
   }
   return sources;
 }
Beispiel #4
0
 public int getAlignedTarget(int sourcePosition) {
   for (Pair<Integer, Integer> alignment : sureAlignments) {
     if (alignment.getSecond() == sourcePosition) {
       return alignment.getFirst();
     }
   }
   System.err.println("nothing aligned with " + sourcePosition);
   return -999;
 }
Beispiel #5
0
 public Pair decode() {
   Pair ret;
   if (cursor < output.size()) {
     ret = (Pair) output.get(cursor++);
     ret.setValid(true);
   } else {
     ret = new Pair(0, 'a', false);
   }
   return ret;
 }
Beispiel #6
0
  private void shiftAlignments(int targetPosition, boolean up, int changeto) {

    Set<Pair<Integer, Integer>> newAlignments = new HashSet<Pair<Integer, Integer>>();

    for (Pair<Integer, Integer> alignment : sureAlignments) {
      int pos = alignment.getFirst();
      // System.out.println("shift: "+pos);

      if (pos < targetPosition) {
        newAlignments.add(alignment);
      } else if (pos == targetPosition) {
        newAlignments.add(
            new Pair<Integer, Integer>((up ? pos + 1 : changeto), alignment.getSecond()));
      } else if (pos > targetPosition) {
        newAlignments.add(
            new Pair<Integer, Integer>((up ? pos + 1 : pos - 1), alignment.getSecond()));
      }
    }

    sureAlignments = newAlignments;
  }
 public int compare(Pair string1, Pair string2) {
   int ans = string1.toString().compareTo(string2.toString());
   if (ans < 0) {
     return -1;
   } else if (ans == 0) {
     if (string1.idFile < string2.idFile) {
       return -1;
     } else if (string1.idFile > string2.idFile) {
       return 1;
     } else {
       if (string1.idWord < string2.idWord) {
         return -1;
       } else if (string1.idWord > string2.idWord) {
         return 1;
       } else {
         return 0;
       }
     }
   } else {
     return 1;
   }
 }
  /** @param args generator string | number of keys | output directory */
  public static void main(String[] args) {

    /* Ensure there is correct number of arguments */
    if (args.length != 3) {
      System.err.println("Usage:");
      System.err.println(
          "\tjava crypto.ElGamalKeyGenerator [generator string] [number of keys] [output directory]");
      System.exit(0);
    }

    /* Set a default limit value */
    int numKeys = -1;

    /* Parse the first argument as the number of keys, expecting an integer */
    try {
      numKeys = Integer.parseInt(args[1]);
    } catch (Exception e) {
      System.out.println("Expected integer for [number of keys], found \"" + args[1] + "\".");
      e.printStackTrace();
      System.exit(0);
    }

    File dir = null;

    /* Create directory with output path in args[2] if it already does not exist */
    try {
      dir = new File(args[2]);
      if (!dir.exists()) dir.mkdirs();
    } catch (Exception e) {
      System.out.println("Expected path for [output directory], found \"" + args[2] + "\".");
      System.out.println("\tError: " + e.getMessage());
      System.exit(0);
    }

    /* Iterate over the number of keys to be generated  */
    for (int i = 0; i < numKeys; i++) {

      /* Generate ElGamal Private and Public key Pair. */
      Pair<Key> keys = ElGamalCrypto.SINGLETON.generate(args[0]);
      Key publicKey = keys.get1();
      Key privateKey = keys.get2();

      /* Here we parse the keys into an s-expression */
      ASExpression pub = publicKey.toASE();
      ASExpression priv = privateKey.toASE();

      /* The generated pair of public and private keys are stored in the <index>public.key and <index>private.key respectively. */
      File pubFile = new File(dir, i + "public.key");
      File privFile = new File(dir, i + "private.key");

      try {

        /* Create a new output stream for the public key file */
        OutputStream out = new FileOutputStream(pubFile);

        /* Convert the s-expression into Rivest Verbatim format and then write it to the <index>public.key */
        out.write(pub.toVerbatim());
        out.flush();
        out.close();

        /* Create a new output stream for the private key file*/
        out = new FileOutputStream(privFile);

        /* Convert the s-expression into Rivest Verbatim format and then write it to the <index>private.key */
        out.write(priv.toVerbatim());
        out.flush();
        out.close();
      } catch (IOException e) {
        System.err.println("Encountered error writing key files.");
        e.printStackTrace();
        System.exit(0);
      }
    }
  }
Beispiel #9
0
  public void solve() throws Exception {
    P[] ps = new P[N];
    Map<Integer, Set<Integer>> map = new HashMap<Integer, Set<Integer>>();
    for (int i = 0; i < N; i++) {
      P p = new P();
      p.x = sc.nextInt();
      p.y = sc.nextInt();
      ps[i] = p;
      if (!map.containsKey(p.x)) {
        Set<Integer> set = new HashSet<Integer>();
        map.put(p.x, set);
      }
      map.get(p.x).add(p.y);
    }
    Arrays.sort(
        ps,
        new Comparator<P>() {
          @Override
          public int compare(P p1, P p2) {
            if (p1.x != p2.x) {
              return p1.x - p2.x;
            }
            return p1.y - p2.y;
          }
        });

    List<Pair> yp = new ArrayList<Pair>();
    for (int i = 0; i < ps.length - 1; i++) {
      if (ps[i].x != ps[i + 1].x) continue;
      int lidx = i + 1;
      while (lidx + 1 < ps.length && ps[i].x == ps[lidx + 1].x) {
        lidx++;
      }
      Pair pair = new Pair();
      pair.s = ps[i].y;
      pair.l = ps[lidx].y;
      pair.base = ps[i].x;
      yp.add(pair);
      i = lidx;
    }

    Arrays.sort(
        ps,
        new Comparator<P>() {
          @Override
          public int compare(P p1, P p2) {
            if (p1.y != p2.y) {
              return p1.y - p2.y;
            }
            return p1.x - p2.x;
          }
        });
    List<Pair> xp = new ArrayList<Pair>();
    for (int i = 0; i < ps.length - 1; i++) {
      if (ps[i].y != ps[i + 1].y) continue;
      int lidx = i + 1;
      while (lidx + 1 < ps.length && ps[i].y == ps[lidx + 1].y) {
        lidx++;
      }
      Pair pair = new Pair();
      pair.s = ps[i].x;
      pair.l = ps[lidx].x;
      pair.base = ps[i].y;
      xp.add(pair);
      i = lidx;
    }

    int ans = 0;
    for (int i = 0; i < yp.size(); i++) {
      int xnow = yp.get(i).base;
      int sy = yp.get(i).s;
      int ly = yp.get(i).l;
      for (int j = 0; j < xp.size(); j++) {
        int y = xp.get(j).base;
        if (y < sy || ly < y) continue;
        if (xp.get(j).s > xnow || xp.get(j).l < xnow) continue;
        try {
          if (!map.get(xnow).contains(y)) {
            ans++;
            map.get(xnow).add(y);
          }
        } catch (Exception ex) {

        }
      }
    }
    ans += ps.length;
    out.println(ans);
  }
Beispiel #10
0
  /** Generates person names tests from the given archive. @throws IOException */
  public void generatePersonNameQuestions(
      Archive archive,
      NERModel nerModel,
      Collection<EmailDocument> allDocs,
      Lexicon lex,
      int numClues)
      throws IOException, GeneralSecurityException, ClassNotFoundException, ReadContentsException,
          ParseException {
    this.archive = archive;
    questions = new ArrayList<>();
    ArchiveCluer cluer = new ArchiveCluer(null, archive, nerModel, null, lex);

    tabooCluesSet = new LinkedHashSet<>();
    archive.assignThreadIds();

    List<ClueEvaluator> evaluators = getDefaultEvals();

    List<Document> docs = archive.getAllDocs();
    Multimap<Contact, EmailDocument> contactToMessages = LinkedHashMultimap.create();
    Multimap<Contact, Long> contactToThreadIds = LinkedHashMultimap.create();

    // sort by date
    Collections.sort(docs);

    Date earliestDate = null, latestDate = null;
    Map<Contact, Date> contactToLatestDate = new LinkedHashMap<>();

    // compute earliest and latest date across all messages in corpus
    for (Document doc : docs) {
      EmailDocument ed = (EmailDocument) doc;

      if (earliestDate == null || ed.date.before(earliestDate)) earliestDate = ed.date;
      if (latestDate == null || ed.date.after(latestDate)) latestDate = ed.date;
    }
    JSPHelper.log.info(
        "===================\nStarting to generate person names memory questions from "
            + docs.size()
            + " messages with "
            + numClues
            + " questions"
            + ", earliest date = "
            + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(earliestDate)
            + " latest date = "
            + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(latestDate));

    Set<Integer> tabooSentenceHashes = new LinkedHashSet<>();

    // create hashes of all sentences seen at least twice (case insensitive, lower cased)
    {
      Set<Integer> hashesSeen = new LinkedHashSet<>();
      for (Document d : docs) {
        String contents = archive.getContents(d, true);
        String cleanedContents = EmailUtils.cleanupEmailMessage(contents);
        SentenceTokenizer st = new SentenceTokenizer(cleanedContents);
        while (st.hasMoreSentences()) {
          String sentence = st.nextSentence();
          sentence = canonicalizeSentence(sentence);
          int hashCode = sentence.hashCode();
          if (hashesSeen.contains(hashCode)) {
            tabooSentenceHashes.add(hashCode);
            log.info("Marking sentence as taboo: " + sentence);
          } else hashesSeen.add(hashCode);
        }
      }
    }

    // compute contactToLatestDate that contact has been seen on
    for (Document doc : docs) {
      EmailDocument ed = (EmailDocument) doc;
      // discard doc if it is not a sent mail
      if ((ed.sentOrReceived(archive.addressBook) & EmailDocument.SENT_MASK) == 0) continue;

      for (Contact c : ed.getParticipatingContactsExceptOwn(archive.addressBook)) {
        Date currentLatestDate = contactToLatestDate.get(c);
        if (currentLatestDate == null || currentLatestDate.before(ed.date))
          contactToLatestDate.put(c, ed.date);
        contactToMessages.put(c, ed);
        contactToThreadIds.put(c, ed.threadID);
      }
    }

    log.info("We are considering " + contactToLatestDate.size() + " contacts");

    Date currentDate = new Date();
    List<Pair<Date, Date>> intervals =
        computeDateIntervals(earliestDate, currentDate); // go back from current date
    // intervals[0] is the most recent.
    JSPHelper.log.info("done computing " + intervals.size() + " intervals");
    for (Pair<Date, Date> p : intervals)
      JSPHelper.log.info(
          "Interval: "
              + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getFirst())
              + " - "
              + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getSecond()));

    int cluesPerInterval =
        (numClues > 0 && intervals.size() > 0)
            ? (numClues + intervals.size() - 1) / intervals.size()
            : 0;
    JSPHelper.log.info(
        "Will try to generate " + Util.pluralize(cluesPerInterval, "questions") + " per interval");

    Multimap<Integer, Contact> intervalToContacts = LinkedHashMultimap.create();

    // nSent is the number of sentences allowed in a clue text
    int nSent = 2;
    for (Contact c : contactToLatestDate.keySet()) {
      Date lastSeenDate = contactToLatestDate.get(c);

      // which interval does this date belong to? we'll assign this contact in that interval in the
      // intervalToContacts map
      int interval = -1;
      Date intervalStart = null, intervalEnd = null;
      {
        int i = 0;
        for (Pair<Date, Date> p : intervals) {
          intervalStart = p.getFirst();
          intervalEnd = p.getSecond();

          if ((intervalStart.before(lastSeenDate) && intervalEnd.after(lastSeenDate))
              || intervalStart.equals(lastSeenDate)
              || intervalEnd.equals(lastSeenDate)) {
            interval = i;
            break;
          }
          i++;
        }
      }

      if (interval < 0 || interval == intervals.size()) {
        JSPHelper.log.info(
            "What, no interval!? for "
                + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(lastSeenDate));
        continue;
      }

      intervalToContacts.put(interval, c);
    }

    log.info("Interval information (interval 0 is the most recent):");
    for (int interval = 0; interval < intervals.size(); interval++) {
      Collection<Contact> contacts = intervalToContacts.get(interval);
      int nContactsForThisInterval = (contacts == null) ? 0 : contacts.size();
      log.info(
          "In interval "
              + interval
              + " there are "
              + Util.pluralize(nContactsForThisInterval, "candidate contact")
              + " who were last seen in this interval");
    }

    for (int interval = 0; interval < intervals.size(); interval++) {
      Date intervalStart = intervals.get(interval).getFirst();
      Date intervalEnd = intervals.get(interval).getSecond();
      Collection<Contact> candidateContactsForThisInterval = intervalToContacts.get(interval);
      if (candidateContactsForThisInterval == null) {
        log.info("Skipping interval " + interval + " because there are no contacts");
        continue;
      }

      Map<Clue, Contact> clueToContact = new LinkedHashMap<>();
      log.info("=======\nGenerating questions for interval " + interval);

      outer:
      for (Contact c : candidateContactsForThisInterval) {
        String name = c.pickBestName();
        if (name.length() < 2) // could also check if alphanumberic only
        continue outer;

        // ignore contact if name does not contain all alphabets. Even a period is not allowed. only
        // space is allowed.
        for (char ch : name.toCharArray()) {
          if (!Character.isAlphabetic(ch) && !Character.isSpaceChar(ch)) continue outer;
        }

        Clue clue =
            cluer.createPersonNameClue(
                c,
                evaluators,
                nerModel,
                intervalStart,
                intervalEnd,
                nSent,
                archive,
                tabooSentenceHashes);
        if (clue != null) clueToContact.put(clue, c);
      }

      List<Clue> clueList = new ArrayList(clueToContact.keySet());
      Collections.sort(clueList);
      List<Clue> selectedClues = new ArrayList<>();
      for (int i = 0; i < cluesPerInterval && i < clueList.size(); i++) {
        selectedClues.add(clueList.get(i));
      }

      log.info(
          "For interval "
              + interval
              + " selected "
              + selectedClues.size()
              + " contacts out of "
              + clueList.size()
              + " possible candidates.");
      //            for (Clue c: clueList)
      //               log.info ("Clue candidate for " + clueToContact.get(c).pickBestName() + "
      // score = " + c.clueStats.finalScore+ " clue is " + c );
      //          for (Clue c: selectedClues)
      //             log.info ("Selected clue: " + clueToContact.get(c).pickBestName() + " score = "
      // + c.clueStats.finalScore+ " clue is " + c);

      for (Clue selectedClue : selectedClues) {
        Contact c = clueToContact.get(selectedClue);
        String name = c.pickBestName();

        List<Integer> lengthList = Crossword.convertToWord(name).getSecond();
        String lengthDescr = "";
        if (lengthList.size() > 1) lengthDescr += Integer.toString(lengthList.size()) + " words: ";

        for (Integer i : lengthList) {
          lengthDescr += Util.pluralize(i, "letter") + ", ";
        }
        lengthDescr =
            lengthDescr.substring(0, lengthDescr.length() - 2); // subtract the extra comma.

        ClueInfo ci = new ClueInfo();
        ci.lastSeenDate = contactToLatestDate.get(c);
        ci.nMessages = contactToThreadIds.get(c).size();
        ci.nThreads = contactToThreadIds.get(c).size();

        questions.add(new MemoryQuestion(this, name, selectedClue, 1, lengthDescr));
      }
    }

    log.info(questions.size() + " questions generated");

    log.info("Top candidates are:");

    // sort q's by clue score
    Collections.sort(questions);

    //		log.info("Based on clue score, top answers:");
    //		for (MemoryQuestion mq: questions)
    //			log.info (mq.correctAnswer + " times= clue=" + mq.clue.clue);

    int count = 0;
    for (MemoryQuestion mq : questions) {
      mq.setQuestionNum(count++);
    }

    // log the questions as well, just in case we don't get to the final point due to user fatigue
    // or crashes
    logStats("questions.final", false);
  }
Beispiel #11
0
  /**
   * Generates list of questions and stores it in the current instance of MemoryStudy We handle two
   * kinds of questions namely, person names tests and non-person name tests. Non-person name test
   * is a fill in the blank kind where the blank is to be filled with the correct non-person entity
   * to complete the sentence person name test is to guess the person in correspondent list based on
   * some distinctive sentences in the mail
   *
   * @param maxInt - max. number of questions from a interval
   * @throws IOException
   */
  public void generateQuestions(
      Archive archive,
      NERModel nerModel,
      Collection<EmailDocument> allDocs,
      Lexicon lex,
      int maxInt,
      boolean personTest)
      throws IOException, GeneralSecurityException, ClassNotFoundException, ReadContentsException,
          ParseException {
    this.archive = archive;
    if (allDocs == null) allDocs = (Collection) archive.getAllDocs();
    questions = new ArrayList<>();
    ArchiveCluer cluer = new ArchiveCluer(null, archive, nerModel, null, lex);

    Short[] itypes =
        new Short[] {
          FeatureDictionary.BUILDING,
          FeatureDictionary.PLACE,
          FeatureDictionary.RIVER,
          FeatureDictionary.ROAD,
          FeatureDictionary.UNIVERSITY,
          FeatureDictionary.MOUNTAIN,
          FeatureDictionary.AIRPORT,
          FeatureDictionary.ISLAND,
          FeatureDictionary.MUSEUM,
          FeatureDictionary.BRIDGE,
          FeatureDictionary.AIRLINE,
          FeatureDictionary.THEATRE,
          FeatureDictionary.LIBRARY,
          FeatureDictionary.LAWFIRM,
          FeatureDictionary.GOVAGENCY
        };
    double CUTOFF = 0.001;
    tabooCluesSet = new LinkedHashSet<>();
    archive.assignThreadIds();

    List<Document> docs = archive.getAllDocs();
    Map<String, Date> entityToLastDate = new LinkedHashMap<>();
    Multimap<String, EmailDocument> entityToMessages = LinkedHashMultimap.create();
    Multimap<String, Long> entityToThreads = LinkedHashMultimap.create();
    Multimap<String, String> ceToDisplayEntity = LinkedHashMultimap.create();

    int di = 0;

    // sort by date
    Collections.sort(docs);

    Set<String> ownerNames = archive.ownerNames;
    Date earliestDate = null, latestDate = null;
    Set<String> allEntities = new LinkedHashSet<>();
    for (Document doc : docs) {
      EmailDocument ed = (EmailDocument) doc;
      if (earliestDate == null || ed.date.before(earliestDate)) earliestDate = ed.date;
      if (latestDate == null || ed.date.after(latestDate)) latestDate = ed.date;

      List<String> entities = new ArrayList<>();
      if (!personTest) {
        entities.addAll(
            Arrays.asList(archive.getAllNamesInDoc(doc, true))
                .stream()
                .filter(n -> n.typeScore > CUTOFF)
                .map(n -> n.text)
                .collect(Collectors.toList()));
      } else {
        // do not consider mailing lists
        if (ed.sentToMailingLists != null && ed.sentToMailingLists.length > 0) continue;
        // discard doc if it is not a sent mail
        if ((ed.sentOrReceived(archive.addressBook) & EmailDocument.SENT_MASK) == 0) continue;

        List<Address> addrs = new ArrayList<>();
        if (ed.to != null) for (Address addr : ed.to) addrs.add(addr);

        List<String> names = new ArrayList<>();
        for (Address addr : addrs) {
          Contact c = archive.addressBook.lookupByAddress(addr);
          names.add(c.pickBestName());
        }

        for (String name : names) {
          if (!ownerNames.contains(name) && !DictUtils.hasDictionaryWord(name)) {
            entities.add(name);
          }
        }
      }
      allEntities.addAll(entities);

      // get entities
      for (String e : entities) {
        if (Util.nullOrEmpty(e)) continue;
        e = e.replaceAll("^\\W+|\\W+$", "");
        if (e.length() > 10 && e.toUpperCase().equals(e))
          continue; // all upper case, more than 10 letters, you're out.

        String ce = DictUtils.canonicalize(e); // canonicalize
        if (ce == null) {
          JSPHelper.log.info("Dropping entity: " + e);
          continue;
        }

        ceToDisplayEntity.put(ce, e);
        entityToLastDate.put(ce, ed.date);
        entityToMessages.put(ce, ed);
        entityToThreads.put(ce, ed.threadID);
      }

      if ((++di) % 1000 == 0) log.info(di + " of " + docs.size() + " messages processed...<br/>");
    }
    log.info(
        "Considered #"
            + allEntities.size()
            + " unique entities and #"
            + ceToDisplayEntity.size()
            + " good ones in #"
            + docs.size()
            + " docs<br>");
    log.info("Owner Names: " + ownerNames);
    JSPHelper.log.info(
        "Considered #"
            + allEntities.size()
            + " unique entities and #"
            + ceToDisplayEntity.size()
            + " good ones in #"
            + docs.size()
            + "docs");

    JSPHelper.log.info(
        "earliest date = "
            + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(earliestDate));
    JSPHelper.log.info(
        "latest date = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(latestDate));

    Multimap<String, String> tokenToCE = LinkedHashMultimap.create();
    for (String ce : ceToDisplayEntity.keySet()) {
      List<String> tokens = Util.tokenize(ce);
      for (String t : tokens) tokenToCE.put(t, ce);
    }

    // Compute date intervals
    int DAYS_PER_INTERVAL = 30;
    List<Pair<Date, Date>> intervals = new ArrayList<Pair<Date, Date>>();
    {
      JSPHelper.log.info("computing time intervals");
      Date closingDate = latestDate;

      JSPHelper.log.info(
          "closing = " + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(closingDate));
      while (earliestDate.before(closingDate)) {
        Calendar cal = new GregorianCalendar();
        cal.setTime(closingDate); // this is the time of the last sighting of the term
        // scroll to the beginning of this month
        cal.set(Calendar.HOUR_OF_DAY, 23);
        cal.set(Calendar.MINUTE, 59);
        cal.set(Calendar.SECOND, 59);
        Date endDate = cal.getTime();

        cal.add(
            Calendar.DATE,
            (1
                - DAYS_PER_INTERVAL)); // 1- because we want from 0:00 of first date to 23:59 of
                                       // last date
        cal.set(Calendar.HOUR_OF_DAY, 0);
        cal.set(Calendar.MINUTE, 0);
        cal.set(Calendar.SECOND, 0);
        Date startDate = cal.getTime();

        intervals.add(new Pair<Date, Date>(startDate, endDate));
        // ok we got an interval

        // closing date for the next interval is 1 day before endDate
        cal.add(Calendar.DATE, -1);
        closingDate = cal.getTime();
      }
      JSPHelper.log.info("done computing intervals, #time intervals: " + intervals.size());
      for (Pair<Date, Date> p : intervals)
        JSPHelper.log.info(
            "Interval: "
                + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getFirst())
                + " - "
                + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(p.getSecond()));
    }

    // initialize clueInfos to empty lists
    List<ClueInfo> clueInfos[] = new ArrayList[intervals.size()];
    for (int i = 0; i < intervals.size(); i++) {
      clueInfos[i] = new ArrayList<ClueInfo>();
    }

    Map<Integer, Integer> intervalCount = new LinkedHashMap<>();
    // nSent is the number of sentences allowed in a clue text
    int nvalidclues = 0, nSent = 2;
    // generate clueInfos for each entity
    for (String ce : entityToLastDate.keySet()) {
      Date lastSeenDate = entityToLastDate.get(ce);

      // compute displayEntity (which has red for core words) and fullAnswer, which is a simple
      // string
      String fullAnswer = "";
      {
        List<String> tokens = Util.tokenize(ceToDisplayEntity.get(ce).iterator().next());
        for (String t : tokens) {
          if (EnglishDictionary.stopWords.contains(t.toLowerCase())) continue;
          fullAnswer += t + " ";
        }
        fullAnswer = fullAnswer.trim();
      }
      // dont want the answer to be scored low just because it has extra non-word chars in the begin
      // or end
      fullAnswer = fullAnswer.replaceAll("^\\W+|\\W+$", "");

      // which interval does this date belong to?
      int interval = -1;
      Date intervalStart = null, intervalEnd = null;
      {
        int i = 0;
        for (Pair<Date, Date> p : intervals) {
          intervalStart = p.getFirst();
          intervalEnd = p.getSecond();

          if ((intervalStart.before(lastSeenDate) && intervalEnd.after(lastSeenDate))
              || intervalStart.equals(lastSeenDate)
              || intervalEnd.equals(lastSeenDate)) {
            interval = i;
            break;
          }
          i++;
        }
      }
      if (interval < 0 || interval == intervals.size())
        JSPHelper.log.info(
            "What, no interval!? for "
                + edu.stanford.muse.email.CalendarUtil.formatDateForDisplay(lastSeenDate));
      if (!intervalCount.containsKey(interval)) intervalCount.put(interval, 0);
      if (intervalCount.get(interval) > maxInt) continue;
      intervalCount.put(interval, intervalCount.get(interval) + 1);

      List<Integer> lengthList = Crossword.convertToWord(fullAnswer).getSecond();
      String lengthDescr = "";
      if (lengthList.size() > 1) lengthDescr += Integer.toString(lengthList.size()) + " words: ";

      for (Integer i : lengthList) {
        lengthDescr += Util.pluralize(i, "letter") + ", ";
      }
      lengthDescr = lengthDescr.substring(0, lengthDescr.length() - 2); // subtract the extra comma.

      ClueInfo ci = new ClueInfo();
      ci.link = "../browse?term=\"" + fullAnswer + "\"&sort_by=recent&searchType=original";
      ci.lastSeenDate = lastSeenDate;
      ci.nMessages = entityToMessages.get(ce).size();
      ci.nThreads = entityToThreads.get(ce).size();

      // TODO: we are doing default initialisation of evaluators by setting it to null below, it is
      // more appropriate to consider it as an argument for this method
      Clue clue =
          cluer.createClue(
              fullAnswer,
              (personTest
                  ? ArchiveCluer.QuestionType.GUESS_CORRESPONDENT
                  : ArchiveCluer.QuestionType.FILL_IN_THE_BLANK),
              null,
              tabooCluesSet,
              null,
              intervalStart,
              intervalEnd,
              nSent,
              archive);
      if (clue != null) ci.clues = new Clue[] {clue};

      if (ci.clues == null || ci.clues.length == 0 || clue == null) {
        JSPHelper.log.warn("Did not find any clue for: " + fullAnswer);
      } else {
        // is the times value of the clue important?
        questions.add(new MemoryQuestion(this, fullAnswer, clue, 1, lengthDescr));
        nvalidclues++;
        // makes sure that the clue with the same statement is not generated again
        tabooCluesSet.add(clue.clue);
      }
      clueInfos[interval].add(ci);
    }
    log.info("Found valid clues for " + nvalidclues + " answers");
    JSPHelper.log.info("Found valid clues for " + nvalidclues + " answers");

    log.info("Top candidates are:");
    for (MemoryQuestion mq : questions)
      log.info(mq.correctAnswer + " times=" + mq.stats.nMessagesWithAnswer);

    // sort q's by clue score
    Collections.sort(questions);

    //		log.info("Based on clue score, top answers:");
    //		for (MemoryQuestion mq: questions)
    //			log.info (mq.correctAnswer + " times= clue=" + mq.clue.clue);

    // now we have up to 2*N questions, sorted by cluescore.
    // drop ones that are prefix/suffix of another, and cap to N
    int prev_size = questions.size();

    int new_size = questions.size();

    //	log.info ("#questions before prefix-suffix elim: " + prev_size + " after: " + new_size);

    int count = 0;
    for (MemoryQuestion mq : questions) {
      mq.setQuestionNum(count++);
    }

    // log the questions as well, just in case we don't get to the final point due to user fatigue
    // or crashes
    logStats("questions.final", false);
  }
Beispiel #12
0
 /** writes out csv stats as an encrypted file in RESULTS_DIR/<userid>/filename */
 public void logStats(String filename, boolean nullClues) {
   Indexer.IndexStats stats = archive.getIndexStats();
   StringBuilder statsLog = new StringBuilder();
   Pair<String, String> indexStats = Util.fieldsToCSV(stats, true);
   Pair<String, String> addressBookStats = Util.fieldsToCSV(archive.addressBook.getStats(), true);
   Pair<String, String> studyStats = Util.fieldsToCSV(stats, true);
   Pair<String, String> archiveStats = Util.fieldsToCSV(archive.stats, true);
   statsLog.append(
       "STUDYSTATS-1: "
           + studyStats.getFirst()
           + indexStats.getFirst()
           + addressBookStats.getFirst()
           + archiveStats.getFirst()
           + "\n");
   statsLog.append(
       "STUDYSTATS-2: "
           + studyStats.getSecond()
           + indexStats.getSecond()
           + addressBookStats.getSecond()
           + archiveStats.getSecond()
           + "\n");
   int idx = 1;
   for (MemoryQuestion mq : this.getQuestions()) {
     if (nullClues) mq.clue.clue = null;
     Pair<String, String> p = Util.fieldsToCSV(mq.clue.clueStats, true);
     Pair<String, String> p1 = Util.fieldsToCSV(mq.stats, true);
     if (idx == 1)
       statsLog.append(
           "QUESTIONSTATS-header: "
               + p.getFirst()
               + ','
               + p1.getFirst()
               + "correct answer, user answer, user answer before hint, clue"
               + "\n");
     //			statsLog.append("QUESTIONSTATS-2: " + p.getSecond()  + ',' + p1.getSecond() +
     // mq.correctAnswer + "," + mq.userAnswer + "," + mq.userAnswerBeforeHint + "," +
     // mq.clue.clue.replaceAll(",", " ") + "\n");
     statsLog.append(
         "QUESTIONSTATS-2: "
             + p.getSecond()
             + ','
             + p1.getSecond()
             + mq.correctAnswer
             + ","
             + mq.userAnswer
             + ","
             + mq.userAnswerBeforeHint
             + "\n");
     idx = idx + 1;
   }
   String RESULTS_DIR =
       System.getProperty("user.home")
           + File.separator
           + "results"
           + File.separator
           + this.stats.userid;
   new File(RESULTS_DIR).mkdirs();
   String file = RESULTS_DIR + File.separator + filename;
   try {
     CryptoUtils.writeEncryptedBytes(statsLog.toString().getBytes("UTF-8"), file);
   } catch (UnsupportedEncodingException e) {
     Util.print_exception(e, log);
   } catch (Exception e) {
     Util.print_exception("NOC ERROR: encryption failed!", e, log);
   }
   log.info(statsLog);
 }
Beispiel #13
0
 @Override
 public int compare(Pair<Integer, Double> o1, Pair<Integer, Double> o2) {
   if (o1.getSecond() > o2.getSecond()) return -1;
   else if (o1.getSecond() < o2.getSecond()) return 1;
   else return 0;
 }
Beispiel #14
0
  public static void main(String[] args) {
    ArrayList<Pair> values = new ArrayList<Pair>();
    Pair pair = new Pair();
    // set pair values:

    System.out.println("enter number of pairs: ");
    Scanner input = new Scanner(System.in);
    int numPair = input.nextInt();

    for (int i = 0; i < numPair; i++) {
      pair.inputPair();
      pair.sortPair();
      values.add(new Pair(pair.getMin(), pair.getMax()));
    }

    for (Pair value : values) {
      System.out.println(" [" + value.getMin() + ", " + value.getMax() + "] ");
    }

    Collections.sort(values, FIRST_LIST);
    System.out.println("After sorting: ");
    for (Pair i : values) System.out.println("[" + i.getMin() + ", " + i.getMax() + "] ");
  }
  /**
   * The core implementation of the search.
   *
   * @param root The root word to search from. Traditionally, this is the root of the sentence.
   * @param candidateFragments The callback for the resulting sentence fragments. This is a
   *     predicate of a triple of values. The return value of the predicate determines whether we
   *     should continue searching. The triple is a triple of
   *     <ol>
   *       <li>The log probability of the sentence fragment, according to the featurizer and the
   *           weights
   *       <li>The features along the path to this fragment. The last element of this is the
   *           features from the most recent step.
   *       <li>The sentence fragment. Because it is relatively expensive to compute the resulting
   *           tree, this is returned as a lazy {@link Supplier}.
   *     </ol>
   *
   * @param classifier The classifier for whether an arc should be on the path to a clause split, a
   *     clause split itself, or neither.
   * @param featurizer The featurizer to use. Make sure this matches the weights!
   * @param actionSpace The action space we are allowed to take. Each action defines a means of
   *     splitting a clause on a dependency boundary.
   */
  protected void search(
      // The root to search from
      IndexedWord root,
      // The output specs
      final Predicate<Triple<Double, List<Counter<String>>, Supplier<SentenceFragment>>>
          candidateFragments,
      // The learning specs
      final Classifier<ClauseSplitter.ClauseClassifierLabel, String> classifier,
      Map<String, ? extends List<String>> hardCodedSplits,
      final Function<Triple<State, Action, State>, Counter<String>> featurizer,
      final Collection<Action> actionSpace,
      final int maxTicks) {
    // (the fringe)
    PriorityQueue<Pair<State, List<Counter<String>>>> fringe = new FixedPrioritiesPriorityQueue<>();
    // (avoid duplicate work)
    Set<IndexedWord> seenWords = new HashSet<>();

    State firstState =
        new State(null, null, -9000, null, x -> {}, true); // First state is implicitly "done"
    fringe.add(Pair.makePair(firstState, new ArrayList<>(0)), -0.0);
    int ticks = 0;

    while (!fringe.isEmpty()) {
      if (++ticks > maxTicks) {
        //        System.err.println("WARNING! Timed out on search with " + ticks + " ticks");
        return;
      }
      // Useful variables
      double logProbSoFar = fringe.getPriority();
      assert logProbSoFar <= 0.0;
      Pair<State, List<Counter<String>>> lastStatePair = fringe.removeFirst();
      State lastState = lastStatePair.first;
      List<Counter<String>> featuresSoFar = lastStatePair.second;
      IndexedWord rootWord = lastState.edge == null ? root : lastState.edge.getDependent();

      // Register thunk
      if (lastState.isDone) {
        if (!candidateFragments.test(
            Triple.makeTriple(
                logProbSoFar,
                featuresSoFar,
                () -> {
                  SemanticGraph copy = new SemanticGraph(tree);
                  lastState
                      .thunk
                      .andThen(
                          x -> {
                            // Add the extra edges back in, if they don't break the tree-ness of the
                            // extraction
                            for (IndexedWord newTreeRoot : x.getRoots()) {
                              if (newTreeRoot != null) { // what a strange thing to have happen...
                                for (SemanticGraphEdge extraEdge :
                                    extraEdgesByGovernor.get(newTreeRoot)) {
                                  assert Util.isTree(x);
                                  //noinspection unchecked
                                  addSubtree(
                                      x,
                                      newTreeRoot,
                                      extraEdge.getRelation().toString(),
                                      tree,
                                      extraEdge.getDependent(),
                                      tree.getIncomingEdgesSorted(newTreeRoot));
                                  assert Util.isTree(x);
                                }
                              }
                            }
                          })
                      .accept(copy);
                  return new SentenceFragment(copy, assumedTruth, false);
                }))) {
          break;
        }
      }

      // Find relevant auxilliary terms
      SemanticGraphEdge subjOrNull = null;
      SemanticGraphEdge objOrNull = null;
      for (SemanticGraphEdge auxEdge : tree.outgoingEdgeIterable(rootWord)) {
        String relString = auxEdge.getRelation().toString();
        if (relString.contains("obj")) {
          objOrNull = auxEdge;
        } else if (relString.contains("subj")) {
          subjOrNull = auxEdge;
        }
      }

      // Iterate over children
      // For each outgoing edge...
      for (SemanticGraphEdge outgoingEdge : tree.outgoingEdgeIterable(rootWord)) {
        // Prohibit indirect speech verbs from splitting off clauses
        // (e.g., 'said', 'think')
        // This fires if the governor is an indirect speech verb, and the outgoing edge is a ccomp
        if (outgoingEdge.getRelation().toString().equals("ccomp")
            && ((outgoingEdge.getGovernor().lemma() != null
                    && INDIRECT_SPEECH_LEMMAS.contains(outgoingEdge.getGovernor().lemma()))
                || INDIRECT_SPEECH_LEMMAS.contains(outgoingEdge.getGovernor().word()))) {
          continue;
        }
        // Get some variables
        String outgoingEdgeRelation = outgoingEdge.getRelation().toString();
        List<String> forcedArcOrder = hardCodedSplits.get(outgoingEdgeRelation);
        if (forcedArcOrder == null && outgoingEdgeRelation.contains(":")) {
          forcedArcOrder =
              hardCodedSplits.get(
                  outgoingEdgeRelation.substring(0, outgoingEdgeRelation.indexOf(":")) + ":*");
        }
        boolean doneForcedArc = false;
        // For each action...
        for (Action action :
            (forcedArcOrder == null ? actionSpace : orderActions(actionSpace, forcedArcOrder))) {
          // Check the prerequisite
          if (!action.prerequisitesMet(tree, outgoingEdge)) {
            continue;
          }
          if (forcedArcOrder != null && doneForcedArc) {
            break;
          }
          // 1. Compute the child state
          Optional<State> candidate =
              action.applyTo(tree, lastState, outgoingEdge, subjOrNull, objOrNull);
          if (candidate.isPresent()) {
            double logProbability;
            ClauseClassifierLabel bestLabel;
            Counter<String> features =
                featurizer.apply(Triple.makeTriple(lastState, action, candidate.get()));
            if (forcedArcOrder != null && !doneForcedArc) {
              logProbability = 0.0;
              bestLabel = ClauseClassifierLabel.CLAUSE_SPLIT;
              doneForcedArc = true;
            } else if (features.containsKey("__undocumented_junit_no_classifier")) {
              logProbability = Double.NEGATIVE_INFINITY;
              bestLabel = ClauseClassifierLabel.CLAUSE_INTERM;
            } else {
              Counter<ClauseClassifierLabel> scores = classifier.scoresOf(new RVFDatum<>(features));
              if (scores.size() > 0) {
                Counters.logNormalizeInPlace(scores);
              }
              String rel = outgoingEdge.getRelation().toString();
              if ("nsubj".equals(rel) || "dobj".equals(rel)) {
                scores.remove(
                    ClauseClassifierLabel.NOT_A_CLAUSE); // Always at least yield on nsubj and dobj
              }
              logProbability = Counters.max(scores, Double.NEGATIVE_INFINITY);
              bestLabel = Counters.argmax(scores, (x, y) -> 0, ClauseClassifierLabel.CLAUSE_SPLIT);
            }

            if (bestLabel != ClauseClassifierLabel.NOT_A_CLAUSE) {
              Pair<State, List<Counter<String>>> childState =
                  Pair.makePair(
                      candidate.get().withIsDone(bestLabel),
                      new ArrayList<Counter<String>>(featuresSoFar) {
                        {
                          add(features);
                        }
                      });
              // 2. Register the child state
              if (!seenWords.contains(childState.first.edge.getDependent())) {
                //            System.err.println("  pushing " + action.signature() + " with " +
                // argmax.first.edge);
                fringe.add(childState, logProbability);
              }
            }
          }
        }
      }

      seenWords.add(rootWord);
    }
    //    System.err.println("Search finished in " + ticks + " ticks and " + classifierEvals + "
    // classifier evaluations.");
  }